core/stdarch/crates/core_arch/src/x86/
avx512f.rs

1use crate::{
2    arch::asm,
3    core_arch::{simd::*, x86::*},
4    intrinsics::simd::*,
5    intrinsics::{fmaf32, fmaf64},
6    mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21pub const fn _mm512_abs_epi32(a: __m512i) -> __m512i {
22    unsafe {
23        let a = a.as_i32x16();
24        let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
25        transmute(r)
26    }
27}
28
29/// Computes the absolute value of packed 32-bit integers in `a`, and store the
30/// unsigned results in `dst` using writemask `k` (elements are copied from
31/// `src` when the corresponding mask bit is not set).
32///
33/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
34#[inline]
35#[target_feature(enable = "avx512f")]
36#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37#[cfg_attr(test, assert_instr(vpabsd))]
38#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39pub const fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
40    unsafe {
41        let abs = _mm512_abs_epi32(a).as_i32x16();
42        transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
43    }
44}
45
46/// Computes the absolute value of packed 32-bit integers in `a`, and store the
47/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
48/// the corresponding mask bit is not set).
49///
50/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
51#[inline]
52#[target_feature(enable = "avx512f")]
53#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
54#[cfg_attr(test, assert_instr(vpabsd))]
55#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
56pub const fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
57    unsafe {
58        let abs = _mm512_abs_epi32(a).as_i32x16();
59        transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
60    }
61}
62
63/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
64///
65/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
66#[inline]
67#[target_feature(enable = "avx512f,avx512vl")]
68#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
69#[cfg_attr(test, assert_instr(vpabsd))]
70#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
71pub const fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
72    unsafe {
73        let abs = _mm256_abs_epi32(a).as_i32x8();
74        transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
75    }
76}
77
78/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
79///
80/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
81#[inline]
82#[target_feature(enable = "avx512f,avx512vl")]
83#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
84#[cfg_attr(test, assert_instr(vpabsd))]
85#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
86pub const fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
87    unsafe {
88        let abs = _mm256_abs_epi32(a).as_i32x8();
89        transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
90    }
91}
92
93/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
96#[inline]
97#[target_feature(enable = "avx512f,avx512vl")]
98#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
99#[cfg_attr(test, assert_instr(vpabsd))]
100#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
101pub const fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
102    unsafe {
103        let abs = _mm_abs_epi32(a).as_i32x4();
104        transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
105    }
106}
107
108/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
109///
110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
111#[inline]
112#[target_feature(enable = "avx512f,avx512vl")]
113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114#[cfg_attr(test, assert_instr(vpabsd))]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
117    unsafe {
118        let abs = _mm_abs_epi32(a).as_i32x4();
119        transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
120    }
121}
122
123/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
124///
125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
126#[inline]
127#[target_feature(enable = "avx512f")]
128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
129#[cfg_attr(test, assert_instr(vpabsq))]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm512_abs_epi64(a: __m512i) -> __m512i {
132    unsafe {
133        let a = a.as_i64x8();
134        let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
135        transmute(r)
136    }
137}
138
139/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
140///
141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
142#[inline]
143#[target_feature(enable = "avx512f")]
144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
145#[cfg_attr(test, assert_instr(vpabsq))]
146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
147pub const fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
148    unsafe {
149        let abs = _mm512_abs_epi64(a).as_i64x8();
150        transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
151    }
152}
153
154/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
157#[inline]
158#[target_feature(enable = "avx512f")]
159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
160#[cfg_attr(test, assert_instr(vpabsq))]
161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
162pub const fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
163    unsafe {
164        let abs = _mm512_abs_epi64(a).as_i64x8();
165        transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
166    }
167}
168
169/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
170///
171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
172#[inline]
173#[target_feature(enable = "avx512f,avx512vl")]
174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
175#[cfg_attr(test, assert_instr(vpabsq))]
176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
177pub const fn _mm256_abs_epi64(a: __m256i) -> __m256i {
178    unsafe {
179        let a = a.as_i64x4();
180        let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
181        transmute(r)
182    }
183}
184
185/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
186///
187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
188#[inline]
189#[target_feature(enable = "avx512f,avx512vl")]
190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
191#[cfg_attr(test, assert_instr(vpabsq))]
192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
193pub const fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
194    unsafe {
195        let abs = _mm256_abs_epi64(a).as_i64x4();
196        transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
197    }
198}
199
200/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
201///
202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
203#[inline]
204#[target_feature(enable = "avx512f,avx512vl")]
205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
206#[cfg_attr(test, assert_instr(vpabsq))]
207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
208pub const fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
209    unsafe {
210        let abs = _mm256_abs_epi64(a).as_i64x4();
211        transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
212    }
213}
214
215/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
216///
217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
218#[inline]
219#[target_feature(enable = "avx512f,avx512vl")]
220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
221#[cfg_attr(test, assert_instr(vpabsq))]
222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
223pub const fn _mm_abs_epi64(a: __m128i) -> __m128i {
224    unsafe {
225        let a = a.as_i64x2();
226        let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
227        transmute(r)
228    }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
239pub const fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
240    unsafe {
241        let abs = _mm_abs_epi64(a).as_i64x2();
242        transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
243    }
244}
245
246/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
247///
248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
249#[inline]
250#[target_feature(enable = "avx512f,avx512vl")]
251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
252#[cfg_attr(test, assert_instr(vpabsq))]
253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
254pub const fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
255    unsafe {
256        let abs = _mm_abs_epi64(a).as_i64x2();
257        transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
258    }
259}
260
261/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
262///
263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
264#[inline]
265#[target_feature(enable = "avx512f")]
266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
267#[cfg_attr(test, assert_instr(vpandd))]
268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
269pub const fn _mm512_abs_ps(v2: __m512) -> __m512 {
270    unsafe { simd_fabs(v2) }
271}
272
273/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
274///
275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
276#[inline]
277#[target_feature(enable = "avx512f")]
278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
279#[cfg_attr(test, assert_instr(vpandd))]
280#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
281pub const fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
282    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
283}
284
285/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
286///
287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
288#[inline]
289#[target_feature(enable = "avx512f")]
290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
291#[cfg_attr(test, assert_instr(vpandq))]
292#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
293pub const fn _mm512_abs_pd(v2: __m512d) -> __m512d {
294    unsafe { simd_fabs(v2) }
295}
296
297/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
298///
299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
300#[inline]
301#[target_feature(enable = "avx512f")]
302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
303#[cfg_attr(test, assert_instr(vpandq))]
304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
305pub const fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
306    unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
307}
308
309/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
312#[inline]
313#[target_feature(enable = "avx512f")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vmovdqa32))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
318    unsafe {
319        let mov = a.as_i32x16();
320        transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
321    }
322}
323
324/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
327#[inline]
328#[target_feature(enable = "avx512f")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vmovdqa32))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
333    unsafe {
334        let mov = a.as_i32x16();
335        transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
336    }
337}
338
339/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
340///
341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
342#[inline]
343#[target_feature(enable = "avx512f,avx512vl")]
344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
345#[cfg_attr(test, assert_instr(vmovdqa32))]
346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
347pub const fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
348    unsafe {
349        let mov = a.as_i32x8();
350        transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
351    }
352}
353
354/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
355///
356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
357#[inline]
358#[target_feature(enable = "avx512f,avx512vl")]
359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
360#[cfg_attr(test, assert_instr(vmovdqa32))]
361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
362pub const fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
363    unsafe {
364        let mov = a.as_i32x8();
365        transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
366    }
367}
368
369/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
370///
371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
372#[inline]
373#[target_feature(enable = "avx512f,avx512vl")]
374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
375#[cfg_attr(test, assert_instr(vmovdqa32))]
376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
377pub const fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
378    unsafe {
379        let mov = a.as_i32x4();
380        transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
381    }
382}
383
384/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
385///
386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
387#[inline]
388#[target_feature(enable = "avx512f,avx512vl")]
389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
390#[cfg_attr(test, assert_instr(vmovdqa32))]
391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
392pub const fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
393    unsafe {
394        let mov = a.as_i32x4();
395        transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
396    }
397}
398
399/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
400///
401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
402#[inline]
403#[target_feature(enable = "avx512f")]
404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
405#[cfg_attr(test, assert_instr(vmovdqa64))]
406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
407pub const fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
408    unsafe {
409        let mov = a.as_i64x8();
410        transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
411    }
412}
413
414/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
415///
416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
417#[inline]
418#[target_feature(enable = "avx512f")]
419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
420#[cfg_attr(test, assert_instr(vmovdqa64))]
421#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
422pub const fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
423    unsafe {
424        let mov = a.as_i64x8();
425        transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
426    }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
437pub const fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
438    unsafe {
439        let mov = a.as_i64x4();
440        transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
441    }
442}
443
444/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
445///
446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
447#[inline]
448#[target_feature(enable = "avx512f,avx512vl")]
449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
450#[cfg_attr(test, assert_instr(vmovdqa64))]
451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
452pub const fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
453    unsafe {
454        let mov = a.as_i64x4();
455        transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
456    }
457}
458
459/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
460///
461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
462#[inline]
463#[target_feature(enable = "avx512f,avx512vl")]
464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
465#[cfg_attr(test, assert_instr(vmovdqa64))]
466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
467pub const fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
468    unsafe {
469        let mov = a.as_i64x2();
470        transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
471    }
472}
473
474/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
475///
476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
477#[inline]
478#[target_feature(enable = "avx512f,avx512vl")]
479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
480#[cfg_attr(test, assert_instr(vmovdqa64))]
481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
482pub const fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
483    unsafe {
484        let mov = a.as_i64x2();
485        transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
486    }
487}
488
489/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
490///
491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
492#[inline]
493#[target_feature(enable = "avx512f")]
494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
495#[cfg_attr(test, assert_instr(vmovaps))]
496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
497pub const fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
498    unsafe {
499        let mov = a.as_f32x16();
500        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
501    }
502}
503
504/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
505///
506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
507#[inline]
508#[target_feature(enable = "avx512f")]
509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
510#[cfg_attr(test, assert_instr(vmovaps))]
511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
512pub const fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
513    unsafe {
514        let mov = a.as_f32x16();
515        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
516    }
517}
518
519/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
520///
521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
522#[inline]
523#[target_feature(enable = "avx512f,avx512vl")]
524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
525#[cfg_attr(test, assert_instr(vmovaps))]
526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
527pub const fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
528    unsafe {
529        let mov = a.as_f32x8();
530        transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
531    }
532}
533
534/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
535///
536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
537#[inline]
538#[target_feature(enable = "avx512f,avx512vl")]
539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
540#[cfg_attr(test, assert_instr(vmovaps))]
541#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
542pub const fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
543    unsafe {
544        let mov = a.as_f32x8();
545        transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
546    }
547}
548
549/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
550///
551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
552#[inline]
553#[target_feature(enable = "avx512f,avx512vl")]
554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
555#[cfg_attr(test, assert_instr(vmovaps))]
556#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
557pub const fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
558    unsafe {
559        let mov = a.as_f32x4();
560        transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
561    }
562}
563
564/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
565///
566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
567#[inline]
568#[target_feature(enable = "avx512f,avx512vl")]
569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
570#[cfg_attr(test, assert_instr(vmovaps))]
571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
572pub const fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
573    unsafe {
574        let mov = a.as_f32x4();
575        transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
576    }
577}
578
579/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
580///
581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
582#[inline]
583#[target_feature(enable = "avx512f")]
584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
585#[cfg_attr(test, assert_instr(vmovapd))]
586#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
587pub const fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
588    unsafe {
589        let mov = a.as_f64x8();
590        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
591    }
592}
593
594/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
595///
596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
597#[inline]
598#[target_feature(enable = "avx512f")]
599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
600#[cfg_attr(test, assert_instr(vmovapd))]
601#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
602pub const fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
603    unsafe {
604        let mov = a.as_f64x8();
605        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
606    }
607}
608
609/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
610///
611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
612#[inline]
613#[target_feature(enable = "avx512f,avx512vl")]
614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
615#[cfg_attr(test, assert_instr(vmovapd))]
616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
617pub const fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
618    unsafe {
619        let mov = a.as_f64x4();
620        transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
621    }
622}
623
624/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
625///
626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
627#[inline]
628#[target_feature(enable = "avx512f,avx512vl")]
629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
630#[cfg_attr(test, assert_instr(vmovapd))]
631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
632pub const fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
633    unsafe {
634        let mov = a.as_f64x4();
635        transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
636    }
637}
638
639/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
640///
641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
642#[inline]
643#[target_feature(enable = "avx512f,avx512vl")]
644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
645#[cfg_attr(test, assert_instr(vmovapd))]
646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
647pub const fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
648    unsafe {
649        let mov = a.as_f64x2();
650        transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
651    }
652}
653
654/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
655///
656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
657#[inline]
658#[target_feature(enable = "avx512f,avx512vl")]
659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
660#[cfg_attr(test, assert_instr(vmovapd))]
661#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
662pub const fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
663    unsafe {
664        let mov = a.as_f64x2();
665        transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
666    }
667}
668
669/// Add packed 32-bit integers in a and b, and store the results in dst.
670///
671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
672#[inline]
673#[target_feature(enable = "avx512f")]
674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
675#[cfg_attr(test, assert_instr(vpaddd))]
676#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
677pub const fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
678    unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
679}
680
681/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
682///
683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
684#[inline]
685#[target_feature(enable = "avx512f")]
686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
687#[cfg_attr(test, assert_instr(vpaddd))]
688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
689pub const fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
690    unsafe {
691        let add = _mm512_add_epi32(a, b).as_i32x16();
692        transmute(simd_select_bitmask(k, add, src.as_i32x16()))
693    }
694}
695
696/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
697///
698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
699#[inline]
700#[target_feature(enable = "avx512f")]
701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
702#[cfg_attr(test, assert_instr(vpaddd))]
703#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
704pub const fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
705    unsafe {
706        let add = _mm512_add_epi32(a, b).as_i32x16();
707        transmute(simd_select_bitmask(k, add, i32x16::ZERO))
708    }
709}
710
711/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
712///
713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
714#[inline]
715#[target_feature(enable = "avx512f,avx512vl")]
716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
717#[cfg_attr(test, assert_instr(vpaddd))]
718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
719pub const fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
720    unsafe {
721        let add = _mm256_add_epi32(a, b).as_i32x8();
722        transmute(simd_select_bitmask(k, add, src.as_i32x8()))
723    }
724}
725
726/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
727///
728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
729#[inline]
730#[target_feature(enable = "avx512f,avx512vl")]
731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
732#[cfg_attr(test, assert_instr(vpaddd))]
733#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
734pub const fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
735    unsafe {
736        let add = _mm256_add_epi32(a, b).as_i32x8();
737        transmute(simd_select_bitmask(k, add, i32x8::ZERO))
738    }
739}
740
741/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
742///
743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
744#[inline]
745#[target_feature(enable = "avx512f,avx512vl")]
746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
747#[cfg_attr(test, assert_instr(vpaddd))]
748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
749pub const fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
750    unsafe {
751        let add = _mm_add_epi32(a, b).as_i32x4();
752        transmute(simd_select_bitmask(k, add, src.as_i32x4()))
753    }
754}
755
756/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
757///
758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
759#[inline]
760#[target_feature(enable = "avx512f,avx512vl")]
761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
762#[cfg_attr(test, assert_instr(vpaddd))]
763#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
764pub const fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
765    unsafe {
766        let add = _mm_add_epi32(a, b).as_i32x4();
767        transmute(simd_select_bitmask(k, add, i32x4::ZERO))
768    }
769}
770
771/// Add packed 64-bit integers in a and b, and store the results in dst.
772///
773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
774#[inline]
775#[target_feature(enable = "avx512f")]
776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
777#[cfg_attr(test, assert_instr(vpaddq))]
778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
779pub const fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
780    unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
781}
782
783/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
784///
785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
786#[inline]
787#[target_feature(enable = "avx512f")]
788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
789#[cfg_attr(test, assert_instr(vpaddq))]
790#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
791pub const fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
792    unsafe {
793        let add = _mm512_add_epi64(a, b).as_i64x8();
794        transmute(simd_select_bitmask(k, add, src.as_i64x8()))
795    }
796}
797
798/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
799///
800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
801#[inline]
802#[target_feature(enable = "avx512f")]
803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
804#[cfg_attr(test, assert_instr(vpaddq))]
805#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
806pub const fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
807    unsafe {
808        let add = _mm512_add_epi64(a, b).as_i64x8();
809        transmute(simd_select_bitmask(k, add, i64x8::ZERO))
810    }
811}
812
813/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
814///
815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
816#[inline]
817#[target_feature(enable = "avx512f,avx512vl")]
818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
819#[cfg_attr(test, assert_instr(vpaddq))]
820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
821pub const fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
822    unsafe {
823        let add = _mm256_add_epi64(a, b).as_i64x4();
824        transmute(simd_select_bitmask(k, add, src.as_i64x4()))
825    }
826}
827
828/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
829///
830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
831#[inline]
832#[target_feature(enable = "avx512f,avx512vl")]
833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
834#[cfg_attr(test, assert_instr(vpaddq))]
835#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
836pub const fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
837    unsafe {
838        let add = _mm256_add_epi64(a, b).as_i64x4();
839        transmute(simd_select_bitmask(k, add, i64x4::ZERO))
840    }
841}
842
843/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
844///
845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
846#[inline]
847#[target_feature(enable = "avx512f,avx512vl")]
848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
849#[cfg_attr(test, assert_instr(vpaddq))]
850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
851pub const fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
852    unsafe {
853        let add = _mm_add_epi64(a, b).as_i64x2();
854        transmute(simd_select_bitmask(k, add, src.as_i64x2()))
855    }
856}
857
858/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
859///
860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
861#[inline]
862#[target_feature(enable = "avx512f,avx512vl")]
863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
864#[cfg_attr(test, assert_instr(vpaddq))]
865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
866pub const fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
867    unsafe {
868        let add = _mm_add_epi64(a, b).as_i64x2();
869        transmute(simd_select_bitmask(k, add, i64x2::ZERO))
870    }
871}
872
873/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
874///
875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
876#[inline]
877#[target_feature(enable = "avx512f")]
878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
879#[cfg_attr(test, assert_instr(vaddps))]
880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
881pub const fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
882    unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
883}
884
885/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
886///
887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
888#[inline]
889#[target_feature(enable = "avx512f")]
890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
891#[cfg_attr(test, assert_instr(vaddps))]
892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
893pub const fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
894    unsafe {
895        let add = _mm512_add_ps(a, b).as_f32x16();
896        transmute(simd_select_bitmask(k, add, src.as_f32x16()))
897    }
898}
899
900/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
901///
902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
903#[inline]
904#[target_feature(enable = "avx512f")]
905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
906#[cfg_attr(test, assert_instr(vaddps))]
907#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
908pub const fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
909    unsafe {
910        let add = _mm512_add_ps(a, b).as_f32x16();
911        transmute(simd_select_bitmask(k, add, f32x16::ZERO))
912    }
913}
914
915/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
916///
917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
918#[inline]
919#[target_feature(enable = "avx512f,avx512vl")]
920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
921#[cfg_attr(test, assert_instr(vaddps))]
922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
923pub const fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
924    unsafe {
925        let add = _mm256_add_ps(a, b).as_f32x8();
926        transmute(simd_select_bitmask(k, add, src.as_f32x8()))
927    }
928}
929
930/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
931///
932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
933#[inline]
934#[target_feature(enable = "avx512f,avx512vl")]
935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
936#[cfg_attr(test, assert_instr(vaddps))]
937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
938pub const fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
939    unsafe {
940        let add = _mm256_add_ps(a, b).as_f32x8();
941        transmute(simd_select_bitmask(k, add, f32x8::ZERO))
942    }
943}
944
945/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
946///
947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
948#[inline]
949#[target_feature(enable = "avx512f,avx512vl")]
950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
951#[cfg_attr(test, assert_instr(vaddps))]
952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
953pub const fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
954    unsafe {
955        let add = _mm_add_ps(a, b).as_f32x4();
956        transmute(simd_select_bitmask(k, add, src.as_f32x4()))
957    }
958}
959
960/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
961///
962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
963#[inline]
964#[target_feature(enable = "avx512f,avx512vl")]
965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
966#[cfg_attr(test, assert_instr(vaddps))]
967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
968pub const fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
969    unsafe {
970        let add = _mm_add_ps(a, b).as_f32x4();
971        transmute(simd_select_bitmask(k, add, f32x4::ZERO))
972    }
973}
974
975/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
976///
977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
978#[inline]
979#[target_feature(enable = "avx512f")]
980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
981#[cfg_attr(test, assert_instr(vaddpd))]
982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
983pub const fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
984    unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
985}
986
987/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
988///
989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
990#[inline]
991#[target_feature(enable = "avx512f")]
992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
993#[cfg_attr(test, assert_instr(vaddpd))]
994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
995pub const fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
996    unsafe {
997        let add = _mm512_add_pd(a, b).as_f64x8();
998        transmute(simd_select_bitmask(k, add, src.as_f64x8()))
999    }
1000}
1001
1002/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1003///
1004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
1005#[inline]
1006#[target_feature(enable = "avx512f")]
1007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1008#[cfg_attr(test, assert_instr(vaddpd))]
1009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1010pub const fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1011    unsafe {
1012        let add = _mm512_add_pd(a, b).as_f64x8();
1013        transmute(simd_select_bitmask(k, add, f64x8::ZERO))
1014    }
1015}
1016
1017/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1018///
1019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
1020#[inline]
1021#[target_feature(enable = "avx512f,avx512vl")]
1022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1023#[cfg_attr(test, assert_instr(vaddpd))]
1024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1025pub const fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1026    unsafe {
1027        let add = _mm256_add_pd(a, b).as_f64x4();
1028        transmute(simd_select_bitmask(k, add, src.as_f64x4()))
1029    }
1030}
1031
1032/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1033///
1034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
1035#[inline]
1036#[target_feature(enable = "avx512f,avx512vl")]
1037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1038#[cfg_attr(test, assert_instr(vaddpd))]
1039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1040pub const fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1041    unsafe {
1042        let add = _mm256_add_pd(a, b).as_f64x4();
1043        transmute(simd_select_bitmask(k, add, f64x4::ZERO))
1044    }
1045}
1046
1047/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1048///
1049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
1050#[inline]
1051#[target_feature(enable = "avx512f,avx512vl")]
1052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1053#[cfg_attr(test, assert_instr(vaddpd))]
1054#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1055pub const fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1056    unsafe {
1057        let add = _mm_add_pd(a, b).as_f64x2();
1058        transmute(simd_select_bitmask(k, add, src.as_f64x2()))
1059    }
1060}
1061
1062/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1063///
1064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
1065#[inline]
1066#[target_feature(enable = "avx512f,avx512vl")]
1067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1068#[cfg_attr(test, assert_instr(vaddpd))]
1069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1070pub const fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1071    unsafe {
1072        let add = _mm_add_pd(a, b).as_f64x2();
1073        transmute(simd_select_bitmask(k, add, f64x2::ZERO))
1074    }
1075}
1076
1077/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1078///
1079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1080#[inline]
1081#[target_feature(enable = "avx512f")]
1082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1083#[cfg_attr(test, assert_instr(vpsubd))]
1084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1085pub const fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1086    unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
1087}
1088
1089/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1090///
1091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1092#[inline]
1093#[target_feature(enable = "avx512f")]
1094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1095#[cfg_attr(test, assert_instr(vpsubd))]
1096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1097pub const fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1098    unsafe {
1099        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1100        transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
1101    }
1102}
1103
1104/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1105///
1106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1107#[inline]
1108#[target_feature(enable = "avx512f")]
1109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1110#[cfg_attr(test, assert_instr(vpsubd))]
1111#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1112pub const fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1113    unsafe {
1114        let sub = _mm512_sub_epi32(a, b).as_i32x16();
1115        transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
1116    }
1117}
1118
1119/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1120///
1121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1122#[inline]
1123#[target_feature(enable = "avx512f,avx512vl")]
1124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125#[cfg_attr(test, assert_instr(vpsubd))]
1126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1127pub const fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1128    unsafe {
1129        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1130        transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
1131    }
1132}
1133
1134/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1135///
1136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1137#[inline]
1138#[target_feature(enable = "avx512f,avx512vl")]
1139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1140#[cfg_attr(test, assert_instr(vpsubd))]
1141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1142pub const fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1143    unsafe {
1144        let sub = _mm256_sub_epi32(a, b).as_i32x8();
1145        transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
1146    }
1147}
1148
1149/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1150///
1151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1152#[inline]
1153#[target_feature(enable = "avx512f,avx512vl")]
1154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1155#[cfg_attr(test, assert_instr(vpsubd))]
1156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1157pub const fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1158    unsafe {
1159        let sub = _mm_sub_epi32(a, b).as_i32x4();
1160        transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
1161    }
1162}
1163
1164/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1165///
1166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1167#[inline]
1168#[target_feature(enable = "avx512f,avx512vl")]
1169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1170#[cfg_attr(test, assert_instr(vpsubd))]
1171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1172pub const fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1173    unsafe {
1174        let sub = _mm_sub_epi32(a, b).as_i32x4();
1175        transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
1176    }
1177}
1178
1179/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1180///
1181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1182#[inline]
1183#[target_feature(enable = "avx512f")]
1184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1185#[cfg_attr(test, assert_instr(vpsubq))]
1186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1187pub const fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1188    unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
1189}
1190
1191/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1192///
1193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1194#[inline]
1195#[target_feature(enable = "avx512f")]
1196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1197#[cfg_attr(test, assert_instr(vpsubq))]
1198#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1199pub const fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1200    unsafe {
1201        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1202        transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
1203    }
1204}
1205
1206/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1212#[cfg_attr(test, assert_instr(vpsubq))]
1213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1214pub const fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1215    unsafe {
1216        let sub = _mm512_sub_epi64(a, b).as_i64x8();
1217        transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
1218    }
1219}
1220
1221/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1222///
1223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1224#[inline]
1225#[target_feature(enable = "avx512f,avx512vl")]
1226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1227#[cfg_attr(test, assert_instr(vpsubq))]
1228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1229pub const fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1230    unsafe {
1231        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1232        transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
1233    }
1234}
1235
1236/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1237///
1238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1239#[inline]
1240#[target_feature(enable = "avx512f,avx512vl")]
1241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1242#[cfg_attr(test, assert_instr(vpsubq))]
1243#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1244pub const fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1245    unsafe {
1246        let sub = _mm256_sub_epi64(a, b).as_i64x4();
1247        transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
1248    }
1249}
1250
1251/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1252///
1253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1254#[inline]
1255#[target_feature(enable = "avx512f,avx512vl")]
1256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1257#[cfg_attr(test, assert_instr(vpsubq))]
1258#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1259pub const fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1260    unsafe {
1261        let sub = _mm_sub_epi64(a, b).as_i64x2();
1262        transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
1263    }
1264}
1265
1266/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1267///
1268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1269#[inline]
1270#[target_feature(enable = "avx512f,avx512vl")]
1271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1272#[cfg_attr(test, assert_instr(vpsubq))]
1273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1274pub const fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1275    unsafe {
1276        let sub = _mm_sub_epi64(a, b).as_i64x2();
1277        transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
1278    }
1279}
1280
1281/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1282///
1283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1284#[inline]
1285#[target_feature(enable = "avx512f")]
1286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1287#[cfg_attr(test, assert_instr(vsubps))]
1288#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1289pub const fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1290    unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
1291}
1292
1293/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1294///
1295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1296#[inline]
1297#[target_feature(enable = "avx512f")]
1298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1299#[cfg_attr(test, assert_instr(vsubps))]
1300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1301pub const fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1302    unsafe {
1303        let sub = _mm512_sub_ps(a, b).as_f32x16();
1304        transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
1305    }
1306}
1307
1308/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1309///
1310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1311#[inline]
1312#[target_feature(enable = "avx512f")]
1313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1314#[cfg_attr(test, assert_instr(vsubps))]
1315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1316pub const fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1317    unsafe {
1318        let sub = _mm512_sub_ps(a, b).as_f32x16();
1319        transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
1320    }
1321}
1322
1323/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1324///
1325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1326#[inline]
1327#[target_feature(enable = "avx512f,avx512vl")]
1328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1329#[cfg_attr(test, assert_instr(vsubps))]
1330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1331pub const fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1332    unsafe {
1333        let sub = _mm256_sub_ps(a, b).as_f32x8();
1334        transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
1335    }
1336}
1337
1338/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1339///
1340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1341#[inline]
1342#[target_feature(enable = "avx512f,avx512vl")]
1343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1344#[cfg_attr(test, assert_instr(vsubps))]
1345#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1346pub const fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1347    unsafe {
1348        let sub = _mm256_sub_ps(a, b).as_f32x8();
1349        transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
1350    }
1351}
1352
1353/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1354///
1355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1356#[inline]
1357#[target_feature(enable = "avx512f,avx512vl")]
1358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1359#[cfg_attr(test, assert_instr(vsubps))]
1360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1361pub const fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1362    unsafe {
1363        let sub = _mm_sub_ps(a, b).as_f32x4();
1364        transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
1365    }
1366}
1367
1368/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1369///
1370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1371#[inline]
1372#[target_feature(enable = "avx512f,avx512vl")]
1373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1374#[cfg_attr(test, assert_instr(vsubps))]
1375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1376pub const fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1377    unsafe {
1378        let sub = _mm_sub_ps(a, b).as_f32x4();
1379        transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
1380    }
1381}
1382
1383/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1384///
1385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1386#[inline]
1387#[target_feature(enable = "avx512f")]
1388#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1389#[cfg_attr(test, assert_instr(vsubpd))]
1390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1391pub const fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1392    unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
1393}
1394
1395/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1396///
1397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1398#[inline]
1399#[target_feature(enable = "avx512f")]
1400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1401#[cfg_attr(test, assert_instr(vsubpd))]
1402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1403pub const fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1404    unsafe {
1405        let sub = _mm512_sub_pd(a, b).as_f64x8();
1406        transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
1407    }
1408}
1409
1410/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1411///
1412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1413#[inline]
1414#[target_feature(enable = "avx512f")]
1415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1416#[cfg_attr(test, assert_instr(vsubpd))]
1417#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1418pub const fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1419    unsafe {
1420        let sub = _mm512_sub_pd(a, b).as_f64x8();
1421        transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
1422    }
1423}
1424
1425/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1426///
1427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1428#[inline]
1429#[target_feature(enable = "avx512f,avx512vl")]
1430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1431#[cfg_attr(test, assert_instr(vsubpd))]
1432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1433pub const fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1434    unsafe {
1435        let sub = _mm256_sub_pd(a, b).as_f64x4();
1436        transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
1437    }
1438}
1439
1440/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1441///
1442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1443#[inline]
1444#[target_feature(enable = "avx512f,avx512vl")]
1445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1446#[cfg_attr(test, assert_instr(vsubpd))]
1447#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1448pub const fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1449    unsafe {
1450        let sub = _mm256_sub_pd(a, b).as_f64x4();
1451        transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
1452    }
1453}
1454
1455/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1456///
1457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1458#[inline]
1459#[target_feature(enable = "avx512f,avx512vl")]
1460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1461#[cfg_attr(test, assert_instr(vsubpd))]
1462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1463pub const fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1464    unsafe {
1465        let sub = _mm_sub_pd(a, b).as_f64x2();
1466        transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
1467    }
1468}
1469
1470/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1476#[cfg_attr(test, assert_instr(vsubpd))]
1477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1478pub const fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1479    unsafe {
1480        let sub = _mm_sub_pd(a, b).as_f64x2();
1481        transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
1482    }
1483}
1484
1485/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1486///
1487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1488#[inline]
1489#[target_feature(enable = "avx512f")]
1490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1491#[cfg_attr(test, assert_instr(vpmuldq))]
1492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1493pub const fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1494    unsafe {
1495        let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1496        let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1497        transmute(simd_mul(a, b))
1498    }
1499}
1500
1501/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1502///
1503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1504#[inline]
1505#[target_feature(enable = "avx512f")]
1506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1507#[cfg_attr(test, assert_instr(vpmuldq))]
1508#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1509pub const fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1510    unsafe {
1511        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1512        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1513    }
1514}
1515
1516/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1517///
1518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1519#[inline]
1520#[target_feature(enable = "avx512f")]
1521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1522#[cfg_attr(test, assert_instr(vpmuldq))]
1523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1524pub const fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1525    unsafe {
1526        let mul = _mm512_mul_epi32(a, b).as_i64x8();
1527        transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
1528    }
1529}
1530
1531/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1532///
1533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1534#[inline]
1535#[target_feature(enable = "avx512f,avx512vl")]
1536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1537#[cfg_attr(test, assert_instr(vpmuldq))]
1538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1539pub const fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1540    unsafe {
1541        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1542        transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
1543    }
1544}
1545
1546/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1547///
1548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1549#[inline]
1550#[target_feature(enable = "avx512f,avx512vl")]
1551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1552#[cfg_attr(test, assert_instr(vpmuldq))]
1553#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1554pub const fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1555    unsafe {
1556        let mul = _mm256_mul_epi32(a, b).as_i64x4();
1557        transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
1558    }
1559}
1560
1561/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1562///
1563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1564#[inline]
1565#[target_feature(enable = "avx512f,avx512vl")]
1566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1567#[cfg_attr(test, assert_instr(vpmuldq))]
1568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1569pub const fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1570    unsafe {
1571        let mul = _mm_mul_epi32(a, b).as_i64x2();
1572        transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
1573    }
1574}
1575
1576/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1577///
1578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1579#[inline]
1580#[target_feature(enable = "avx512f,avx512vl")]
1581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1582#[cfg_attr(test, assert_instr(vpmuldq))]
1583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1584pub const fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1585    unsafe {
1586        let mul = _mm_mul_epi32(a, b).as_i64x2();
1587        transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
1588    }
1589}
1590
1591/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1594#[inline]
1595#[target_feature(enable = "avx512f")]
1596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1597#[cfg_attr(test, assert_instr(vpmulld))]
1598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1599pub const fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1600    unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
1601}
1602
1603/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1604///
1605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1606#[inline]
1607#[target_feature(enable = "avx512f")]
1608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1609#[cfg_attr(test, assert_instr(vpmulld))]
1610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1611pub const fn _mm512_mask_mullo_epi32(
1612    src: __m512i,
1613    k: __mmask16,
1614    a: __m512i,
1615    b: __m512i,
1616) -> __m512i {
1617    unsafe {
1618        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1619        transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
1620    }
1621}
1622
1623/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1624///
1625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1626#[inline]
1627#[target_feature(enable = "avx512f")]
1628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1629#[cfg_attr(test, assert_instr(vpmulld))]
1630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1631pub const fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1632    unsafe {
1633        let mul = _mm512_mullo_epi32(a, b).as_i32x16();
1634        transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
1635    }
1636}
1637
1638/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1639///
1640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1641#[inline]
1642#[target_feature(enable = "avx512f,avx512vl")]
1643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1644#[cfg_attr(test, assert_instr(vpmulld))]
1645#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1646pub const fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1647    unsafe {
1648        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1649        transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
1650    }
1651}
1652
1653/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1654///
1655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1656#[inline]
1657#[target_feature(enable = "avx512f,avx512vl")]
1658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1659#[cfg_attr(test, assert_instr(vpmulld))]
1660#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1661pub const fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1662    unsafe {
1663        let mul = _mm256_mullo_epi32(a, b).as_i32x8();
1664        transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
1665    }
1666}
1667
1668/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1669///
1670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1671#[inline]
1672#[target_feature(enable = "avx512f,avx512vl")]
1673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1674#[cfg_attr(test, assert_instr(vpmulld))]
1675#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1676pub const fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1677    unsafe {
1678        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1679        transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
1680    }
1681}
1682
1683/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1684///
1685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1686#[inline]
1687#[target_feature(enable = "avx512f,avx512vl")]
1688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1689#[cfg_attr(test, assert_instr(vpmulld))]
1690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1691pub const fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1692    unsafe {
1693        let mul = _mm_mullo_epi32(a, b).as_i32x4();
1694        transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
1695    }
1696}
1697
1698/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1699///
1700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1701///
1702/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1703#[inline]
1704#[target_feature(enable = "avx512f")]
1705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1707pub const fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1708    unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
1709}
1710
1711/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1712///
1713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1714///
1715/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1716#[inline]
1717#[target_feature(enable = "avx512f")]
1718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1719#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1720pub const fn _mm512_mask_mullox_epi64(
1721    src: __m512i,
1722    k: __mmask8,
1723    a: __m512i,
1724    b: __m512i,
1725) -> __m512i {
1726    unsafe {
1727        let mul = _mm512_mullox_epi64(a, b).as_i64x8();
1728        transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
1729    }
1730}
1731
1732/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1733///
1734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1735#[inline]
1736#[target_feature(enable = "avx512f")]
1737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1738#[cfg_attr(test, assert_instr(vpmuludq))]
1739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1740pub const fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1741    unsafe {
1742        let a = a.as_u64x8();
1743        let b = b.as_u64x8();
1744        let mask = u64x8::splat(u32::MAX as u64);
1745        transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
1746    }
1747}
1748
1749/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1750///
1751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1752#[inline]
1753#[target_feature(enable = "avx512f")]
1754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1755#[cfg_attr(test, assert_instr(vpmuludq))]
1756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1757pub const fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1758    unsafe {
1759        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1760        transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
1761    }
1762}
1763
1764/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1765///
1766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1767#[inline]
1768#[target_feature(enable = "avx512f")]
1769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1770#[cfg_attr(test, assert_instr(vpmuludq))]
1771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1772pub const fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1773    unsafe {
1774        let mul = _mm512_mul_epu32(a, b).as_u64x8();
1775        transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
1776    }
1777}
1778
1779/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1780///
1781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1782#[inline]
1783#[target_feature(enable = "avx512f,avx512vl")]
1784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1785#[cfg_attr(test, assert_instr(vpmuludq))]
1786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1787pub const fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1788    unsafe {
1789        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1790        transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
1791    }
1792}
1793
1794/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1795///
1796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1797#[inline]
1798#[target_feature(enable = "avx512f,avx512vl")]
1799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1800#[cfg_attr(test, assert_instr(vpmuludq))]
1801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1802pub const fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1803    unsafe {
1804        let mul = _mm256_mul_epu32(a, b).as_u64x4();
1805        transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
1806    }
1807}
1808
1809/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1810///
1811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1812#[inline]
1813#[target_feature(enable = "avx512f,avx512vl")]
1814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1815#[cfg_attr(test, assert_instr(vpmuludq))]
1816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1817pub const fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1818    unsafe {
1819        let mul = _mm_mul_epu32(a, b).as_u64x2();
1820        transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
1821    }
1822}
1823
1824/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1825///
1826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1827#[inline]
1828#[target_feature(enable = "avx512f,avx512vl")]
1829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1830#[cfg_attr(test, assert_instr(vpmuludq))]
1831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1832pub const fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1833    unsafe {
1834        let mul = _mm_mul_epu32(a, b).as_u64x2();
1835        transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
1836    }
1837}
1838
1839/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1840///
1841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1842#[inline]
1843#[target_feature(enable = "avx512f")]
1844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1845#[cfg_attr(test, assert_instr(vmulps))]
1846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1847pub const fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1848    unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
1849}
1850
1851/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1852///
1853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1854#[inline]
1855#[target_feature(enable = "avx512f")]
1856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1857#[cfg_attr(test, assert_instr(vmulps))]
1858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1859pub const fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1860    unsafe {
1861        let mul = _mm512_mul_ps(a, b).as_f32x16();
1862        transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
1863    }
1864}
1865
1866/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1867///
1868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1869#[inline]
1870#[target_feature(enable = "avx512f")]
1871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1872#[cfg_attr(test, assert_instr(vmulps))]
1873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1874pub const fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1875    unsafe {
1876        let mul = _mm512_mul_ps(a, b).as_f32x16();
1877        transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
1878    }
1879}
1880
1881/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1882///
1883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1884#[inline]
1885#[target_feature(enable = "avx512f,avx512vl")]
1886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1887#[cfg_attr(test, assert_instr(vmulps))]
1888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1889pub const fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1890    unsafe {
1891        let mul = _mm256_mul_ps(a, b).as_f32x8();
1892        transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
1893    }
1894}
1895
1896/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1899#[inline]
1900#[target_feature(enable = "avx512f,avx512vl")]
1901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902#[cfg_attr(test, assert_instr(vmulps))]
1903#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1904pub const fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1905    unsafe {
1906        let mul = _mm256_mul_ps(a, b).as_f32x8();
1907        transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
1908    }
1909}
1910
1911/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1912///
1913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1914#[inline]
1915#[target_feature(enable = "avx512f,avx512vl")]
1916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1917#[cfg_attr(test, assert_instr(vmulps))]
1918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1919pub const fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1920    unsafe {
1921        let mul = _mm_mul_ps(a, b).as_f32x4();
1922        transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
1923    }
1924}
1925
1926/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1927///
1928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1929#[inline]
1930#[target_feature(enable = "avx512f,avx512vl")]
1931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1932#[cfg_attr(test, assert_instr(vmulps))]
1933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1934pub const fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1935    unsafe {
1936        let mul = _mm_mul_ps(a, b).as_f32x4();
1937        transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
1938    }
1939}
1940
1941/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1942///
1943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1944#[inline]
1945#[target_feature(enable = "avx512f")]
1946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1947#[cfg_attr(test, assert_instr(vmulpd))]
1948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1949pub const fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1950    unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
1951}
1952
1953/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1954///
1955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1956#[inline]
1957#[target_feature(enable = "avx512f")]
1958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1959#[cfg_attr(test, assert_instr(vmulpd))]
1960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1961pub const fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1962    unsafe {
1963        let mul = _mm512_mul_pd(a, b).as_f64x8();
1964        transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
1965    }
1966}
1967
1968/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1969///
1970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1971#[inline]
1972#[target_feature(enable = "avx512f")]
1973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1974#[cfg_attr(test, assert_instr(vmulpd))]
1975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1976pub const fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1977    unsafe {
1978        let mul = _mm512_mul_pd(a, b).as_f64x8();
1979        transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
1980    }
1981}
1982
1983/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1984///
1985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1986#[inline]
1987#[target_feature(enable = "avx512f,avx512vl")]
1988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1989#[cfg_attr(test, assert_instr(vmulpd))]
1990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1991pub const fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1992    unsafe {
1993        let mul = _mm256_mul_pd(a, b).as_f64x4();
1994        transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
1995    }
1996}
1997
1998/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1999///
2000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
2001#[inline]
2002#[target_feature(enable = "avx512f,avx512vl")]
2003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2004#[cfg_attr(test, assert_instr(vmulpd))]
2005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2006pub const fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2007    unsafe {
2008        let mul = _mm256_mul_pd(a, b).as_f64x4();
2009        transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
2010    }
2011}
2012
2013/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2014///
2015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
2016#[inline]
2017#[target_feature(enable = "avx512f,avx512vl")]
2018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2019#[cfg_attr(test, assert_instr(vmulpd))]
2020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2021pub const fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2022    unsafe {
2023        let mul = _mm_mul_pd(a, b).as_f64x2();
2024        transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
2025    }
2026}
2027
2028/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2029///
2030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
2031#[inline]
2032#[target_feature(enable = "avx512f,avx512vl")]
2033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2034#[cfg_attr(test, assert_instr(vmulpd))]
2035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2036pub const fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2037    unsafe {
2038        let mul = _mm_mul_pd(a, b).as_f64x2();
2039        transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
2040    }
2041}
2042
2043/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2044///
2045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
2046#[inline]
2047#[target_feature(enable = "avx512f")]
2048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2049#[cfg_attr(test, assert_instr(vdivps))]
2050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2051pub const fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
2052    unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
2053}
2054
2055/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2056///
2057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
2058#[inline]
2059#[target_feature(enable = "avx512f")]
2060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2061#[cfg_attr(test, assert_instr(vdivps))]
2062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2063pub const fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2064    unsafe {
2065        let div = _mm512_div_ps(a, b).as_f32x16();
2066        transmute(simd_select_bitmask(k, div, src.as_f32x16()))
2067    }
2068}
2069
2070/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2071///
2072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
2073#[inline]
2074#[target_feature(enable = "avx512f")]
2075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2076#[cfg_attr(test, assert_instr(vdivps))]
2077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2078pub const fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2079    unsafe {
2080        let div = _mm512_div_ps(a, b).as_f32x16();
2081        transmute(simd_select_bitmask(k, div, f32x16::ZERO))
2082    }
2083}
2084
2085/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2086///
2087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
2088#[inline]
2089#[target_feature(enable = "avx512f,avx512vl")]
2090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2091#[cfg_attr(test, assert_instr(vdivps))]
2092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2093pub const fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2094    unsafe {
2095        let div = _mm256_div_ps(a, b).as_f32x8();
2096        transmute(simd_select_bitmask(k, div, src.as_f32x8()))
2097    }
2098}
2099
2100/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2101///
2102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
2103#[inline]
2104#[target_feature(enable = "avx512f,avx512vl")]
2105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2106#[cfg_attr(test, assert_instr(vdivps))]
2107#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2108pub const fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2109    unsafe {
2110        let div = _mm256_div_ps(a, b).as_f32x8();
2111        transmute(simd_select_bitmask(k, div, f32x8::ZERO))
2112    }
2113}
2114
2115/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
2118#[inline]
2119#[target_feature(enable = "avx512f,avx512vl")]
2120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2121#[cfg_attr(test, assert_instr(vdivps))]
2122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2123pub const fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2124    unsafe {
2125        let div = _mm_div_ps(a, b).as_f32x4();
2126        transmute(simd_select_bitmask(k, div, src.as_f32x4()))
2127    }
2128}
2129
2130/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2131///
2132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
2133#[inline]
2134#[target_feature(enable = "avx512f,avx512vl")]
2135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2136#[cfg_attr(test, assert_instr(vdivps))]
2137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2138pub const fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2139    unsafe {
2140        let div = _mm_div_ps(a, b).as_f32x4();
2141        transmute(simd_select_bitmask(k, div, f32x4::ZERO))
2142    }
2143}
2144
2145/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
2146///
2147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
2148#[inline]
2149#[target_feature(enable = "avx512f")]
2150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2151#[cfg_attr(test, assert_instr(vdivpd))]
2152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2153pub const fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
2154    unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
2155}
2156
2157/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2160#[inline]
2161#[target_feature(enable = "avx512f")]
2162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2163#[cfg_attr(test, assert_instr(vdivpd))]
2164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2165pub const fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2166    unsafe {
2167        let div = _mm512_div_pd(a, b).as_f64x8();
2168        transmute(simd_select_bitmask(k, div, src.as_f64x8()))
2169    }
2170}
2171
2172/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2173///
2174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2175#[inline]
2176#[target_feature(enable = "avx512f")]
2177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2178#[cfg_attr(test, assert_instr(vdivpd))]
2179#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2180pub const fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2181    unsafe {
2182        let div = _mm512_div_pd(a, b).as_f64x8();
2183        transmute(simd_select_bitmask(k, div, f64x8::ZERO))
2184    }
2185}
2186
2187/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2188///
2189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2190#[inline]
2191#[target_feature(enable = "avx512f,avx512vl")]
2192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2193#[cfg_attr(test, assert_instr(vdivpd))]
2194#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2195pub const fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2196    unsafe {
2197        let div = _mm256_div_pd(a, b).as_f64x4();
2198        transmute(simd_select_bitmask(k, div, src.as_f64x4()))
2199    }
2200}
2201
2202/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2203///
2204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2205#[inline]
2206#[target_feature(enable = "avx512f,avx512vl")]
2207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2208#[cfg_attr(test, assert_instr(vdivpd))]
2209#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2210pub const fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2211    unsafe {
2212        let div = _mm256_div_pd(a, b).as_f64x4();
2213        transmute(simd_select_bitmask(k, div, f64x4::ZERO))
2214    }
2215}
2216
2217/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2218///
2219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2220#[inline]
2221#[target_feature(enable = "avx512f,avx512vl")]
2222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2223#[cfg_attr(test, assert_instr(vdivpd))]
2224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2225pub const fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2226    unsafe {
2227        let div = _mm_div_pd(a, b).as_f64x2();
2228        transmute(simd_select_bitmask(k, div, src.as_f64x2()))
2229    }
2230}
2231
2232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2233///
2234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2235#[inline]
2236#[target_feature(enable = "avx512f,avx512vl")]
2237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2238#[cfg_attr(test, assert_instr(vdivpd))]
2239#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2240pub const fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2241    unsafe {
2242        let div = _mm_div_pd(a, b).as_f64x2();
2243        transmute(simd_select_bitmask(k, div, f64x2::ZERO))
2244    }
2245}
2246
2247/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2248///
2249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2250#[inline]
2251#[target_feature(enable = "avx512f")]
2252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2253#[cfg_attr(test, assert_instr(vpmaxsd))]
2254#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2255pub const fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2256    unsafe { simd_imax(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2257}
2258
2259/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2260///
2261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2262#[inline]
2263#[target_feature(enable = "avx512f")]
2264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2265#[cfg_attr(test, assert_instr(vpmaxsd))]
2266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2267pub const fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2268    unsafe {
2269        let max = _mm512_max_epi32(a, b).as_i32x16();
2270        transmute(simd_select_bitmask(k, max, src.as_i32x16()))
2271    }
2272}
2273
2274/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2275///
2276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2277#[inline]
2278#[target_feature(enable = "avx512f")]
2279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2280#[cfg_attr(test, assert_instr(vpmaxsd))]
2281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2282pub const fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2283    unsafe {
2284        let max = _mm512_max_epi32(a, b).as_i32x16();
2285        transmute(simd_select_bitmask(k, max, i32x16::ZERO))
2286    }
2287}
2288
2289/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2290///
2291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2292#[inline]
2293#[target_feature(enable = "avx512f,avx512vl")]
2294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2295#[cfg_attr(test, assert_instr(vpmaxsd))]
2296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2297pub const fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2298    unsafe {
2299        let max = _mm256_max_epi32(a, b).as_i32x8();
2300        transmute(simd_select_bitmask(k, max, src.as_i32x8()))
2301    }
2302}
2303
2304/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2305///
2306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2307#[inline]
2308#[target_feature(enable = "avx512f,avx512vl")]
2309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2310#[cfg_attr(test, assert_instr(vpmaxsd))]
2311#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2312pub const fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2313    unsafe {
2314        let max = _mm256_max_epi32(a, b).as_i32x8();
2315        transmute(simd_select_bitmask(k, max, i32x8::ZERO))
2316    }
2317}
2318
2319/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2320///
2321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2322#[inline]
2323#[target_feature(enable = "avx512f,avx512vl")]
2324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2325#[cfg_attr(test, assert_instr(vpmaxsd))]
2326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2327pub const fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2328    unsafe {
2329        let max = _mm_max_epi32(a, b).as_i32x4();
2330        transmute(simd_select_bitmask(k, max, src.as_i32x4()))
2331    }
2332}
2333
2334/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2335///
2336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2337#[inline]
2338#[target_feature(enable = "avx512f,avx512vl")]
2339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2340#[cfg_attr(test, assert_instr(vpmaxsd))]
2341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2342pub const fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2343    unsafe {
2344        let max = _mm_max_epi32(a, b).as_i32x4();
2345        transmute(simd_select_bitmask(k, max, i32x4::ZERO))
2346    }
2347}
2348
2349/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2350///
2351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2352#[inline]
2353#[target_feature(enable = "avx512f")]
2354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2355#[cfg_attr(test, assert_instr(vpmaxsq))]
2356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2357pub const fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2358    unsafe { simd_imax(a.as_i64x8(), b.as_i64x8()).as_m512i() }
2359}
2360
2361/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2362///
2363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2364#[inline]
2365#[target_feature(enable = "avx512f")]
2366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2367#[cfg_attr(test, assert_instr(vpmaxsq))]
2368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2369pub const fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2370    unsafe {
2371        let max = _mm512_max_epi64(a, b).as_i64x8();
2372        transmute(simd_select_bitmask(k, max, src.as_i64x8()))
2373    }
2374}
2375
2376/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2377///
2378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2379#[inline]
2380#[target_feature(enable = "avx512f")]
2381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2382#[cfg_attr(test, assert_instr(vpmaxsq))]
2383#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2384pub const fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2385    unsafe {
2386        let max = _mm512_max_epi64(a, b).as_i64x8();
2387        transmute(simd_select_bitmask(k, max, i64x8::ZERO))
2388    }
2389}
2390
2391/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2392///
2393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2394#[inline]
2395#[target_feature(enable = "avx512f,avx512vl")]
2396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2397#[cfg_attr(test, assert_instr(vpmaxsq))]
2398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2399pub const fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2400    unsafe { simd_imax(a.as_i64x4(), b.as_i64x4()).as_m256i() }
2401}
2402
2403/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2404///
2405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2406#[inline]
2407#[target_feature(enable = "avx512f,avx512vl")]
2408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2409#[cfg_attr(test, assert_instr(vpmaxsq))]
2410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2411pub const fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2412    unsafe {
2413        let max = _mm256_max_epi64(a, b).as_i64x4();
2414        transmute(simd_select_bitmask(k, max, src.as_i64x4()))
2415    }
2416}
2417
2418/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2419///
2420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2421#[inline]
2422#[target_feature(enable = "avx512f,avx512vl")]
2423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2424#[cfg_attr(test, assert_instr(vpmaxsq))]
2425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2426pub const fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2427    unsafe {
2428        let max = _mm256_max_epi64(a, b).as_i64x4();
2429        transmute(simd_select_bitmask(k, max, i64x4::ZERO))
2430    }
2431}
2432
2433/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2434///
2435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2436#[inline]
2437#[target_feature(enable = "avx512f,avx512vl")]
2438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2439#[cfg_attr(test, assert_instr(vpmaxsq))]
2440#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2441pub const fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2442    unsafe { simd_imax(a.as_i64x2(), b.as_i64x2()).as_m128i() }
2443}
2444
2445/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2446///
2447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2448#[inline]
2449#[target_feature(enable = "avx512f,avx512vl")]
2450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2451#[cfg_attr(test, assert_instr(vpmaxsq))]
2452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2453pub const fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2454    unsafe {
2455        let max = _mm_max_epi64(a, b).as_i64x2();
2456        transmute(simd_select_bitmask(k, max, src.as_i64x2()))
2457    }
2458}
2459
2460/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2461///
2462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2463#[inline]
2464#[target_feature(enable = "avx512f,avx512vl")]
2465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2466#[cfg_attr(test, assert_instr(vpmaxsq))]
2467#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2468pub const fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2469    unsafe {
2470        let max = _mm_max_epi64(a, b).as_i64x2();
2471        transmute(simd_select_bitmask(k, max, i64x2::ZERO))
2472    }
2473}
2474
2475/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2476///
2477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2478#[inline]
2479#[target_feature(enable = "avx512f")]
2480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2481#[cfg_attr(test, assert_instr(vmaxps))]
2482pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2483    unsafe {
2484        transmute(vmaxps(
2485            a.as_f32x16(),
2486            b.as_f32x16(),
2487            _MM_FROUND_CUR_DIRECTION,
2488        ))
2489    }
2490}
2491
2492/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2493///
2494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2495#[inline]
2496#[target_feature(enable = "avx512f")]
2497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2498#[cfg_attr(test, assert_instr(vmaxps))]
2499pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2500    unsafe {
2501        let max = _mm512_max_ps(a, b).as_f32x16();
2502        transmute(simd_select_bitmask(k, max, src.as_f32x16()))
2503    }
2504}
2505
2506/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2507///
2508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2509#[inline]
2510#[target_feature(enable = "avx512f")]
2511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2512#[cfg_attr(test, assert_instr(vmaxps))]
2513pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2514    unsafe {
2515        let max = _mm512_max_ps(a, b).as_f32x16();
2516        transmute(simd_select_bitmask(k, max, f32x16::ZERO))
2517    }
2518}
2519
2520/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2521///
2522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2523#[inline]
2524#[target_feature(enable = "avx512f,avx512vl")]
2525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2526#[cfg_attr(test, assert_instr(vmaxps))]
2527pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2528    unsafe {
2529        let max = _mm256_max_ps(a, b).as_f32x8();
2530        transmute(simd_select_bitmask(k, max, src.as_f32x8()))
2531    }
2532}
2533
2534/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2535///
2536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2537#[inline]
2538#[target_feature(enable = "avx512f,avx512vl")]
2539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2540#[cfg_attr(test, assert_instr(vmaxps))]
2541pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2542    unsafe {
2543        let max = _mm256_max_ps(a, b).as_f32x8();
2544        transmute(simd_select_bitmask(k, max, f32x8::ZERO))
2545    }
2546}
2547
2548/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2549///
2550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2551#[inline]
2552#[target_feature(enable = "avx512f,avx512vl")]
2553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2554#[cfg_attr(test, assert_instr(vmaxps))]
2555pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2556    unsafe {
2557        let max = _mm_max_ps(a, b).as_f32x4();
2558        transmute(simd_select_bitmask(k, max, src.as_f32x4()))
2559    }
2560}
2561
2562/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2563///
2564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2565#[inline]
2566#[target_feature(enable = "avx512f,avx512vl")]
2567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2568#[cfg_attr(test, assert_instr(vmaxps))]
2569pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2570    unsafe {
2571        let max = _mm_max_ps(a, b).as_f32x4();
2572        transmute(simd_select_bitmask(k, max, f32x4::ZERO))
2573    }
2574}
2575
2576/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2577///
2578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2579#[inline]
2580#[target_feature(enable = "avx512f")]
2581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2582#[cfg_attr(test, assert_instr(vmaxpd))]
2583pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2584    unsafe { transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2585}
2586
2587/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2588///
2589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2590#[inline]
2591#[target_feature(enable = "avx512f")]
2592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2593#[cfg_attr(test, assert_instr(vmaxpd))]
2594pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2595    unsafe {
2596        let max = _mm512_max_pd(a, b).as_f64x8();
2597        transmute(simd_select_bitmask(k, max, src.as_f64x8()))
2598    }
2599}
2600
2601/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2602///
2603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2604#[inline]
2605#[target_feature(enable = "avx512f")]
2606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2607#[cfg_attr(test, assert_instr(vmaxpd))]
2608pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2609    unsafe {
2610        let max = _mm512_max_pd(a, b).as_f64x8();
2611        transmute(simd_select_bitmask(k, max, f64x8::ZERO))
2612    }
2613}
2614
2615/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2616///
2617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2618#[inline]
2619#[target_feature(enable = "avx512f,avx512vl")]
2620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2621#[cfg_attr(test, assert_instr(vmaxpd))]
2622pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2623    unsafe {
2624        let max = _mm256_max_pd(a, b).as_f64x4();
2625        transmute(simd_select_bitmask(k, max, src.as_f64x4()))
2626    }
2627}
2628
2629/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2630///
2631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2632#[inline]
2633#[target_feature(enable = "avx512f,avx512vl")]
2634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2635#[cfg_attr(test, assert_instr(vmaxpd))]
2636pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2637    unsafe {
2638        let max = _mm256_max_pd(a, b).as_f64x4();
2639        transmute(simd_select_bitmask(k, max, f64x4::ZERO))
2640    }
2641}
2642
2643/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2644///
2645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2646#[inline]
2647#[target_feature(enable = "avx512f,avx512vl")]
2648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2649#[cfg_attr(test, assert_instr(vmaxpd))]
2650pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2651    unsafe {
2652        let max = _mm_max_pd(a, b).as_f64x2();
2653        transmute(simd_select_bitmask(k, max, src.as_f64x2()))
2654    }
2655}
2656
2657/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2658///
2659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2660#[inline]
2661#[target_feature(enable = "avx512f,avx512vl")]
2662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2663#[cfg_attr(test, assert_instr(vmaxpd))]
2664pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2665    unsafe {
2666        let max = _mm_max_pd(a, b).as_f64x2();
2667        transmute(simd_select_bitmask(k, max, f64x2::ZERO))
2668    }
2669}
2670
2671/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2672///
2673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2674#[inline]
2675#[target_feature(enable = "avx512f")]
2676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2677#[cfg_attr(test, assert_instr(vpmaxud))]
2678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2679pub const fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2680    unsafe { simd_imax(a.as_u32x16(), b.as_u32x16()).as_m512i() }
2681}
2682
2683/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2684///
2685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2686#[inline]
2687#[target_feature(enable = "avx512f")]
2688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2689#[cfg_attr(test, assert_instr(vpmaxud))]
2690#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2691pub const fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2692    unsafe {
2693        let max = _mm512_max_epu32(a, b).as_u32x16();
2694        transmute(simd_select_bitmask(k, max, src.as_u32x16()))
2695    }
2696}
2697
2698/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2699///
2700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2701#[inline]
2702#[target_feature(enable = "avx512f")]
2703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2704#[cfg_attr(test, assert_instr(vpmaxud))]
2705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2706pub const fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2707    unsafe {
2708        let max = _mm512_max_epu32(a, b).as_u32x16();
2709        transmute(simd_select_bitmask(k, max, u32x16::ZERO))
2710    }
2711}
2712
2713/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2714///
2715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2716#[inline]
2717#[target_feature(enable = "avx512f,avx512vl")]
2718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2719#[cfg_attr(test, assert_instr(vpmaxud))]
2720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2721pub const fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2722    unsafe {
2723        let max = _mm256_max_epu32(a, b).as_u32x8();
2724        transmute(simd_select_bitmask(k, max, src.as_u32x8()))
2725    }
2726}
2727
2728/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2729///
2730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2731#[inline]
2732#[target_feature(enable = "avx512f,avx512vl")]
2733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2734#[cfg_attr(test, assert_instr(vpmaxud))]
2735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2736pub const fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2737    unsafe {
2738        let max = _mm256_max_epu32(a, b).as_u32x8();
2739        transmute(simd_select_bitmask(k, max, u32x8::ZERO))
2740    }
2741}
2742
2743/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2744///
2745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2746#[inline]
2747#[target_feature(enable = "avx512f,avx512vl")]
2748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2749#[cfg_attr(test, assert_instr(vpmaxud))]
2750#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2751pub const fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2752    unsafe {
2753        let max = _mm_max_epu32(a, b).as_u32x4();
2754        transmute(simd_select_bitmask(k, max, src.as_u32x4()))
2755    }
2756}
2757
2758/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2759///
2760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2761#[inline]
2762#[target_feature(enable = "avx512f,avx512vl")]
2763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2764#[cfg_attr(test, assert_instr(vpmaxud))]
2765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2766pub const fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2767    unsafe {
2768        let max = _mm_max_epu32(a, b).as_u32x4();
2769        transmute(simd_select_bitmask(k, max, u32x4::ZERO))
2770    }
2771}
2772
2773/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2774///
2775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2776#[inline]
2777#[target_feature(enable = "avx512f")]
2778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2779#[cfg_attr(test, assert_instr(vpmaxuq))]
2780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2781pub const fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2782    unsafe { simd_imax(a.as_u64x8(), b.as_u64x8()).as_m512i() }
2783}
2784
2785/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2786///
2787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2788#[inline]
2789#[target_feature(enable = "avx512f")]
2790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2791#[cfg_attr(test, assert_instr(vpmaxuq))]
2792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2793pub const fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2794    unsafe {
2795        let max = _mm512_max_epu64(a, b).as_u64x8();
2796        transmute(simd_select_bitmask(k, max, src.as_u64x8()))
2797    }
2798}
2799
2800/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2801///
2802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2803#[inline]
2804#[target_feature(enable = "avx512f")]
2805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2806#[cfg_attr(test, assert_instr(vpmaxuq))]
2807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2808pub const fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2809    unsafe {
2810        let max = _mm512_max_epu64(a, b).as_u64x8();
2811        transmute(simd_select_bitmask(k, max, u64x8::ZERO))
2812    }
2813}
2814
2815/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2816///
2817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2818#[inline]
2819#[target_feature(enable = "avx512f,avx512vl")]
2820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2821#[cfg_attr(test, assert_instr(vpmaxuq))]
2822#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2823pub const fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2824    unsafe { simd_imax(a.as_u64x4(), b.as_u64x4()).as_m256i() }
2825}
2826
2827/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2828///
2829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2830#[inline]
2831#[target_feature(enable = "avx512f,avx512vl")]
2832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2833#[cfg_attr(test, assert_instr(vpmaxuq))]
2834#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2835pub const fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2836    unsafe {
2837        let max = _mm256_max_epu64(a, b).as_u64x4();
2838        transmute(simd_select_bitmask(k, max, src.as_u64x4()))
2839    }
2840}
2841
2842/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2843///
2844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2845#[inline]
2846#[target_feature(enable = "avx512f,avx512vl")]
2847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2848#[cfg_attr(test, assert_instr(vpmaxuq))]
2849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2850pub const fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2851    unsafe {
2852        let max = _mm256_max_epu64(a, b).as_u64x4();
2853        transmute(simd_select_bitmask(k, max, u64x4::ZERO))
2854    }
2855}
2856
2857/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2858///
2859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2860#[inline]
2861#[target_feature(enable = "avx512f,avx512vl")]
2862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2863#[cfg_attr(test, assert_instr(vpmaxuq))]
2864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2865pub const fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2866    unsafe { simd_imax(a.as_u64x2(), b.as_u64x2()).as_m128i() }
2867}
2868
2869/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2870///
2871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2872#[inline]
2873#[target_feature(enable = "avx512f,avx512vl")]
2874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2875#[cfg_attr(test, assert_instr(vpmaxuq))]
2876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2877pub const fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2878    unsafe {
2879        let max = _mm_max_epu64(a, b).as_u64x2();
2880        transmute(simd_select_bitmask(k, max, src.as_u64x2()))
2881    }
2882}
2883
2884/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2885///
2886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2887#[inline]
2888#[target_feature(enable = "avx512f,avx512vl")]
2889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2890#[cfg_attr(test, assert_instr(vpmaxuq))]
2891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2892pub const fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2893    unsafe {
2894        let max = _mm_max_epu64(a, b).as_u64x2();
2895        transmute(simd_select_bitmask(k, max, u64x2::ZERO))
2896    }
2897}
2898
2899/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2900///
2901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2902#[inline]
2903#[target_feature(enable = "avx512f")]
2904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2905#[cfg_attr(test, assert_instr(vpminsd))]
2906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2907pub const fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2908    unsafe { simd_imin(a.as_i32x16(), b.as_i32x16()).as_m512i() }
2909}
2910
2911/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2912///
2913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2914#[inline]
2915#[target_feature(enable = "avx512f")]
2916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2917#[cfg_attr(test, assert_instr(vpminsd))]
2918#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2919pub const fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2920    unsafe {
2921        let min = _mm512_min_epi32(a, b).as_i32x16();
2922        transmute(simd_select_bitmask(k, min, src.as_i32x16()))
2923    }
2924}
2925
2926/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2927///
2928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2929#[inline]
2930#[target_feature(enable = "avx512f")]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932#[cfg_attr(test, assert_instr(vpminsd))]
2933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2934pub const fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2935    unsafe {
2936        let min = _mm512_min_epi32(a, b).as_i32x16();
2937        transmute(simd_select_bitmask(k, min, i32x16::ZERO))
2938    }
2939}
2940
2941/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2942///
2943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2944#[inline]
2945#[target_feature(enable = "avx512f,avx512vl")]
2946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2947#[cfg_attr(test, assert_instr(vpminsd))]
2948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2949pub const fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2950    unsafe {
2951        let min = _mm256_min_epi32(a, b).as_i32x8();
2952        transmute(simd_select_bitmask(k, min, src.as_i32x8()))
2953    }
2954}
2955
2956/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2957///
2958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2959#[inline]
2960#[target_feature(enable = "avx512f,avx512vl")]
2961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2962#[cfg_attr(test, assert_instr(vpminsd))]
2963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2964pub const fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2965    unsafe {
2966        let min = _mm256_min_epi32(a, b).as_i32x8();
2967        transmute(simd_select_bitmask(k, min, i32x8::ZERO))
2968    }
2969}
2970
2971/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2972///
2973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2974#[inline]
2975#[target_feature(enable = "avx512f,avx512vl")]
2976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2977#[cfg_attr(test, assert_instr(vpminsd))]
2978#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2979pub const fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2980    unsafe {
2981        let min = _mm_min_epi32(a, b).as_i32x4();
2982        transmute(simd_select_bitmask(k, min, src.as_i32x4()))
2983    }
2984}
2985
2986/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2987///
2988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2989#[inline]
2990#[target_feature(enable = "avx512f,avx512vl")]
2991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2992#[cfg_attr(test, assert_instr(vpminsd))]
2993#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2994pub const fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2995    unsafe {
2996        let min = _mm_min_epi32(a, b).as_i32x4();
2997        transmute(simd_select_bitmask(k, min, i32x4::ZERO))
2998    }
2999}
3000
3001/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3002///
3003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
3004#[inline]
3005#[target_feature(enable = "avx512f")]
3006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3007#[cfg_attr(test, assert_instr(vpminsq))]
3008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3009pub const fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
3010    unsafe { simd_imin(a.as_i64x8(), b.as_i64x8()).as_m512i() }
3011}
3012
3013/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3014///
3015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
3016#[inline]
3017#[target_feature(enable = "avx512f")]
3018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3019#[cfg_attr(test, assert_instr(vpminsq))]
3020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3021pub const fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3022    unsafe {
3023        let min = _mm512_min_epi64(a, b).as_i64x8();
3024        transmute(simd_select_bitmask(k, min, src.as_i64x8()))
3025    }
3026}
3027
3028/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3029///
3030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
3031#[inline]
3032#[target_feature(enable = "avx512f")]
3033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3034#[cfg_attr(test, assert_instr(vpminsq))]
3035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3036pub const fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3037    unsafe {
3038        let min = _mm512_min_epi64(a, b).as_i64x8();
3039        transmute(simd_select_bitmask(k, min, i64x8::ZERO))
3040    }
3041}
3042
3043/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3044///
3045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
3046#[inline]
3047#[target_feature(enable = "avx512f,avx512vl")]
3048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3049#[cfg_attr(test, assert_instr(vpminsq))]
3050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3051pub const fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
3052    unsafe { simd_imin(a.as_i64x4(), b.as_i64x4()).as_m256i() }
3053}
3054
3055/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3056///
3057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
3058#[inline]
3059#[target_feature(enable = "avx512f,avx512vl")]
3060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3061#[cfg_attr(test, assert_instr(vpminsq))]
3062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3063pub const fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3064    unsafe {
3065        let min = _mm256_min_epi64(a, b).as_i64x4();
3066        transmute(simd_select_bitmask(k, min, src.as_i64x4()))
3067    }
3068}
3069
3070/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3071///
3072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
3073#[inline]
3074#[target_feature(enable = "avx512f,avx512vl")]
3075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3076#[cfg_attr(test, assert_instr(vpminsq))]
3077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3078pub const fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3079    unsafe {
3080        let min = _mm256_min_epi64(a, b).as_i64x4();
3081        transmute(simd_select_bitmask(k, min, i64x4::ZERO))
3082    }
3083}
3084
3085/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
3086///
3087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
3088#[inline]
3089#[target_feature(enable = "avx512f,avx512vl")]
3090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3091#[cfg_attr(test, assert_instr(vpminsq))]
3092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3093pub const fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
3094    unsafe { simd_imin(a.as_i64x2(), b.as_i64x2()).as_m128i() }
3095}
3096
3097/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3098///
3099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
3100#[inline]
3101#[target_feature(enable = "avx512f,avx512vl")]
3102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3103#[cfg_attr(test, assert_instr(vpminsq))]
3104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3105pub const fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3106    unsafe {
3107        let min = _mm_min_epi64(a, b).as_i64x2();
3108        transmute(simd_select_bitmask(k, min, src.as_i64x2()))
3109    }
3110}
3111
3112/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3113///
3114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
3115#[inline]
3116#[target_feature(enable = "avx512f,avx512vl")]
3117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3118#[cfg_attr(test, assert_instr(vpminsq))]
3119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3120pub const fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3121    unsafe {
3122        let min = _mm_min_epi64(a, b).as_i64x2();
3123        transmute(simd_select_bitmask(k, min, i64x2::ZERO))
3124    }
3125}
3126
3127/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
3128///
3129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
3130#[inline]
3131#[target_feature(enable = "avx512f")]
3132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3133#[cfg_attr(test, assert_instr(vminps))]
3134pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
3135    unsafe {
3136        transmute(vminps(
3137            a.as_f32x16(),
3138            b.as_f32x16(),
3139            _MM_FROUND_CUR_DIRECTION,
3140        ))
3141    }
3142}
3143
3144/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3145///
3146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
3147#[inline]
3148#[target_feature(enable = "avx512f")]
3149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3150#[cfg_attr(test, assert_instr(vminps))]
3151pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
3152    unsafe {
3153        let min = _mm512_min_ps(a, b).as_f32x16();
3154        transmute(simd_select_bitmask(k, min, src.as_f32x16()))
3155    }
3156}
3157
3158/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3159///
3160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3161#[inline]
3162#[target_feature(enable = "avx512f")]
3163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3164#[cfg_attr(test, assert_instr(vminps))]
3165pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3166    unsafe {
3167        let min = _mm512_min_ps(a, b).as_f32x16();
3168        transmute(simd_select_bitmask(k, min, f32x16::ZERO))
3169    }
3170}
3171
3172/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3173///
3174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3175#[inline]
3176#[target_feature(enable = "avx512f,avx512vl")]
3177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3178#[cfg_attr(test, assert_instr(vminps))]
3179pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3180    unsafe {
3181        let min = _mm256_min_ps(a, b).as_f32x8();
3182        transmute(simd_select_bitmask(k, min, src.as_f32x8()))
3183    }
3184}
3185
3186/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3187///
3188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3189#[inline]
3190#[target_feature(enable = "avx512f,avx512vl")]
3191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3192#[cfg_attr(test, assert_instr(vminps))]
3193pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3194    unsafe {
3195        let min = _mm256_min_ps(a, b).as_f32x8();
3196        transmute(simd_select_bitmask(k, min, f32x8::ZERO))
3197    }
3198}
3199
3200/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3201///
3202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3203#[inline]
3204#[target_feature(enable = "avx512f,avx512vl")]
3205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3206#[cfg_attr(test, assert_instr(vminps))]
3207pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3208    unsafe {
3209        let min = _mm_min_ps(a, b).as_f32x4();
3210        transmute(simd_select_bitmask(k, min, src.as_f32x4()))
3211    }
3212}
3213
3214/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3215///
3216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3217#[inline]
3218#[target_feature(enable = "avx512f,avx512vl")]
3219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3220#[cfg_attr(test, assert_instr(vminps))]
3221pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3222    unsafe {
3223        let min = _mm_min_ps(a, b).as_f32x4();
3224        transmute(simd_select_bitmask(k, min, f32x4::ZERO))
3225    }
3226}
3227
3228/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3229///
3230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3231#[inline]
3232#[target_feature(enable = "avx512f")]
3233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3234#[cfg_attr(test, assert_instr(vminpd))]
3235pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3236    unsafe { transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3237}
3238
3239/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3240///
3241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3242#[inline]
3243#[target_feature(enable = "avx512f")]
3244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3245#[cfg_attr(test, assert_instr(vminpd))]
3246pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3247    unsafe {
3248        let min = _mm512_min_pd(a, b).as_f64x8();
3249        transmute(simd_select_bitmask(k, min, src.as_f64x8()))
3250    }
3251}
3252
3253/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3254///
3255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3256#[inline]
3257#[target_feature(enable = "avx512f")]
3258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3259#[cfg_attr(test, assert_instr(vminpd))]
3260pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3261    unsafe {
3262        let min = _mm512_min_pd(a, b).as_f64x8();
3263        transmute(simd_select_bitmask(k, min, f64x8::ZERO))
3264    }
3265}
3266
3267/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3268///
3269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3270#[inline]
3271#[target_feature(enable = "avx512f,avx512vl")]
3272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3273#[cfg_attr(test, assert_instr(vminpd))]
3274pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3275    unsafe {
3276        let min = _mm256_min_pd(a, b).as_f64x4();
3277        transmute(simd_select_bitmask(k, min, src.as_f64x4()))
3278    }
3279}
3280
3281/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3282///
3283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3284#[inline]
3285#[target_feature(enable = "avx512f,avx512vl")]
3286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3287#[cfg_attr(test, assert_instr(vminpd))]
3288pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3289    unsafe {
3290        let min = _mm256_min_pd(a, b).as_f64x4();
3291        transmute(simd_select_bitmask(k, min, f64x4::ZERO))
3292    }
3293}
3294
3295/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3296///
3297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3298#[inline]
3299#[target_feature(enable = "avx512f,avx512vl")]
3300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3301#[cfg_attr(test, assert_instr(vminpd))]
3302pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3303    unsafe {
3304        let min = _mm_min_pd(a, b).as_f64x2();
3305        transmute(simd_select_bitmask(k, min, src.as_f64x2()))
3306    }
3307}
3308
3309/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3310///
3311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3312#[inline]
3313#[target_feature(enable = "avx512f,avx512vl")]
3314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3315#[cfg_attr(test, assert_instr(vminpd))]
3316pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3317    unsafe {
3318        let min = _mm_min_pd(a, b).as_f64x2();
3319        transmute(simd_select_bitmask(k, min, f64x2::ZERO))
3320    }
3321}
3322
3323/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3324///
3325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3326#[inline]
3327#[target_feature(enable = "avx512f")]
3328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3329#[cfg_attr(test, assert_instr(vpminud))]
3330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3331pub const fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3332    unsafe { simd_imin(a.as_u32x16(), b.as_u32x16()).as_m512i() }
3333}
3334
3335/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3336///
3337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3338#[inline]
3339#[target_feature(enable = "avx512f")]
3340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3341#[cfg_attr(test, assert_instr(vpminud))]
3342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3343pub const fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3344    unsafe {
3345        let min = _mm512_min_epu32(a, b).as_u32x16();
3346        transmute(simd_select_bitmask(k, min, src.as_u32x16()))
3347    }
3348}
3349
3350/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3351///
3352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3353#[inline]
3354#[target_feature(enable = "avx512f")]
3355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3356#[cfg_attr(test, assert_instr(vpminud))]
3357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3358pub const fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3359    unsafe {
3360        let min = _mm512_min_epu32(a, b).as_u32x16();
3361        transmute(simd_select_bitmask(k, min, u32x16::ZERO))
3362    }
3363}
3364
3365/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3366///
3367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3368#[inline]
3369#[target_feature(enable = "avx512f,avx512vl")]
3370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3371#[cfg_attr(test, assert_instr(vpminud))]
3372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3373pub const fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3374    unsafe {
3375        let min = _mm256_min_epu32(a, b).as_u32x8();
3376        transmute(simd_select_bitmask(k, min, src.as_u32x8()))
3377    }
3378}
3379
3380/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3381///
3382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3383#[inline]
3384#[target_feature(enable = "avx512f,avx512vl")]
3385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3386#[cfg_attr(test, assert_instr(vpminud))]
3387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3388pub const fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3389    unsafe {
3390        let min = _mm256_min_epu32(a, b).as_u32x8();
3391        transmute(simd_select_bitmask(k, min, u32x8::ZERO))
3392    }
3393}
3394
3395/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3396///
3397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3398#[inline]
3399#[target_feature(enable = "avx512f,avx512vl")]
3400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3401#[cfg_attr(test, assert_instr(vpminud))]
3402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3403pub const fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3404    unsafe {
3405        let min = _mm_min_epu32(a, b).as_u32x4();
3406        transmute(simd_select_bitmask(k, min, src.as_u32x4()))
3407    }
3408}
3409
3410/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3411///
3412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3413#[inline]
3414#[target_feature(enable = "avx512f,avx512vl")]
3415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3416#[cfg_attr(test, assert_instr(vpminud))]
3417#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3418pub const fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3419    unsafe {
3420        let min = _mm_min_epu32(a, b).as_u32x4();
3421        transmute(simd_select_bitmask(k, min, u32x4::ZERO))
3422    }
3423}
3424
3425/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3426///
3427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3428#[inline]
3429#[target_feature(enable = "avx512f")]
3430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3431#[cfg_attr(test, assert_instr(vpminuq))]
3432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3433pub const fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3434    unsafe { simd_imin(a.as_u64x8(), b.as_u64x8()).as_m512i() }
3435}
3436
3437/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3438///
3439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3440#[inline]
3441#[target_feature(enable = "avx512f")]
3442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3443#[cfg_attr(test, assert_instr(vpminuq))]
3444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3445pub const fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3446    unsafe {
3447        let min = _mm512_min_epu64(a, b).as_u64x8();
3448        transmute(simd_select_bitmask(k, min, src.as_u64x8()))
3449    }
3450}
3451
3452/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3453///
3454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3455#[inline]
3456#[target_feature(enable = "avx512f")]
3457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3458#[cfg_attr(test, assert_instr(vpminuq))]
3459#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3460pub const fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3461    unsafe {
3462        let min = _mm512_min_epu64(a, b).as_u64x8();
3463        transmute(simd_select_bitmask(k, min, u64x8::ZERO))
3464    }
3465}
3466
3467/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3470#[inline]
3471#[target_feature(enable = "avx512f,avx512vl")]
3472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3473#[cfg_attr(test, assert_instr(vpminuq))]
3474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3475pub const fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3476    unsafe { simd_imin(a.as_u64x4(), b.as_u64x4()).as_m256i() }
3477}
3478
3479/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3480///
3481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3482#[inline]
3483#[target_feature(enable = "avx512f,avx512vl")]
3484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3485#[cfg_attr(test, assert_instr(vpminuq))]
3486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3487pub const fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3488    unsafe {
3489        let min = _mm256_min_epu64(a, b).as_u64x4();
3490        transmute(simd_select_bitmask(k, min, src.as_u64x4()))
3491    }
3492}
3493
3494/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3495///
3496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3497#[inline]
3498#[target_feature(enable = "avx512f,avx512vl")]
3499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3500#[cfg_attr(test, assert_instr(vpminuq))]
3501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3502pub const fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3503    unsafe {
3504        let min = _mm256_min_epu64(a, b).as_u64x4();
3505        transmute(simd_select_bitmask(k, min, u64x4::ZERO))
3506    }
3507}
3508
3509/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3510///
3511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3512#[inline]
3513#[target_feature(enable = "avx512f,avx512vl")]
3514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3515#[cfg_attr(test, assert_instr(vpminuq))]
3516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3517pub const fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3518    unsafe { simd_imin(a.as_u64x2(), b.as_u64x2()).as_m128i() }
3519}
3520
3521/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3522///
3523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3524#[inline]
3525#[target_feature(enable = "avx512f,avx512vl")]
3526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3527#[cfg_attr(test, assert_instr(vpminuq))]
3528#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3529pub const fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3530    unsafe {
3531        let min = _mm_min_epu64(a, b).as_u64x2();
3532        transmute(simd_select_bitmask(k, min, src.as_u64x2()))
3533    }
3534}
3535
3536/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3537///
3538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3539#[inline]
3540#[target_feature(enable = "avx512f,avx512vl")]
3541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3542#[cfg_attr(test, assert_instr(vpminuq))]
3543#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3544pub const fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3545    unsafe {
3546        let min = _mm_min_epu64(a, b).as_u64x2();
3547        transmute(simd_select_bitmask(k, min, u64x2::ZERO))
3548    }
3549}
3550
3551/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3552///
3553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3554#[inline]
3555#[target_feature(enable = "avx512f")]
3556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3557#[cfg_attr(test, assert_instr(vsqrtps))]
3558pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3559    unsafe { simd_fsqrt(a) }
3560}
3561
3562/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3563///
3564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3565#[inline]
3566#[target_feature(enable = "avx512f")]
3567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3568#[cfg_attr(test, assert_instr(vsqrtps))]
3569pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3570    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3571}
3572
3573/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3574///
3575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3576#[inline]
3577#[target_feature(enable = "avx512f")]
3578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3579#[cfg_attr(test, assert_instr(vsqrtps))]
3580pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3581    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps()) }
3582}
3583
3584/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3585///
3586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3587#[inline]
3588#[target_feature(enable = "avx512f,avx512vl")]
3589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3590#[cfg_attr(test, assert_instr(vsqrtps))]
3591pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3592    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3593}
3594
3595/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3596///
3597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3598#[inline]
3599#[target_feature(enable = "avx512f,avx512vl")]
3600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3601#[cfg_attr(test, assert_instr(vsqrtps))]
3602pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3603    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps()) }
3604}
3605
3606/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3607///
3608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3609#[inline]
3610#[target_feature(enable = "avx512f,avx512vl")]
3611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3612#[cfg_attr(test, assert_instr(vsqrtps))]
3613pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3614    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3615}
3616
3617/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3618///
3619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3620#[inline]
3621#[target_feature(enable = "avx512f,avx512vl")]
3622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3623#[cfg_attr(test, assert_instr(vsqrtps))]
3624pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3625    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps()) }
3626}
3627
3628/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3629///
3630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3631#[inline]
3632#[target_feature(enable = "avx512f")]
3633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3634#[cfg_attr(test, assert_instr(vsqrtpd))]
3635pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3636    unsafe { simd_fsqrt(a) }
3637}
3638
3639/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3640///
3641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3642#[inline]
3643#[target_feature(enable = "avx512f")]
3644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3645#[cfg_attr(test, assert_instr(vsqrtpd))]
3646pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3647    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3648}
3649
3650/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3651///
3652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3653#[inline]
3654#[target_feature(enable = "avx512f")]
3655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3656#[cfg_attr(test, assert_instr(vsqrtpd))]
3657pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3658    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd()) }
3659}
3660
3661/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3662///
3663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3664#[inline]
3665#[target_feature(enable = "avx512f,avx512vl")]
3666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3667#[cfg_attr(test, assert_instr(vsqrtpd))]
3668pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3669    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3670}
3671
3672/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3673///
3674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3675#[inline]
3676#[target_feature(enable = "avx512f,avx512vl")]
3677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3678#[cfg_attr(test, assert_instr(vsqrtpd))]
3679pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3680    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd()) }
3681}
3682
3683/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3684///
3685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3686#[inline]
3687#[target_feature(enable = "avx512f,avx512vl")]
3688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3689#[cfg_attr(test, assert_instr(vsqrtpd))]
3690pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3691    unsafe { simd_select_bitmask(k, simd_fsqrt(a), src) }
3692}
3693
3694/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3695///
3696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3697#[inline]
3698#[target_feature(enable = "avx512f,avx512vl")]
3699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3700#[cfg_attr(test, assert_instr(vsqrtpd))]
3701pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3702    unsafe { simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd()) }
3703}
3704
3705/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3706///
3707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3708#[inline]
3709#[target_feature(enable = "avx512f")]
3710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3711#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3712#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3713pub const fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3714    unsafe { simd_fma(a, b, c) }
3715}
3716
3717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3718///
3719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3720#[inline]
3721#[target_feature(enable = "avx512f")]
3722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3723#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3724#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3725pub const fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3726    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
3727}
3728
3729/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3730///
3731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3732#[inline]
3733#[target_feature(enable = "avx512f")]
3734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3735#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3736#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3737pub const fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3738    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
3739}
3740
3741/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3742///
3743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3744#[inline]
3745#[target_feature(enable = "avx512f")]
3746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3747#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3749pub const fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3750    unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
3751}
3752
3753/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3760#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3761pub const fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3762    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
3763}
3764
3765/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3766///
3767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3768#[inline]
3769#[target_feature(enable = "avx512f,avx512vl")]
3770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3771#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3773pub const fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3774    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
3775}
3776
3777/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3778///
3779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3780#[inline]
3781#[target_feature(enable = "avx512f,avx512vl")]
3782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3783#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3785pub const fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3786    unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
3787}
3788
3789/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3790///
3791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3792#[inline]
3793#[target_feature(enable = "avx512f,avx512vl")]
3794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3795#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3796#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3797pub const fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3798    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
3799}
3800
3801/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3802///
3803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3804#[inline]
3805#[target_feature(enable = "avx512f,avx512vl")]
3806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3807#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3809pub const fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3810    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
3811}
3812
3813/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3814///
3815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3816#[inline]
3817#[target_feature(enable = "avx512f,avx512vl")]
3818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3819#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3820#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3821pub const fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3822    unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
3823}
3824
3825/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3826///
3827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3828#[inline]
3829#[target_feature(enable = "avx512f")]
3830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3831#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3833pub const fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3834    unsafe { simd_fma(a, b, c) }
3835}
3836
3837/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3838///
3839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3840#[inline]
3841#[target_feature(enable = "avx512f")]
3842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3843#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3845pub const fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3846    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
3847}
3848
3849/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3850///
3851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3852#[inline]
3853#[target_feature(enable = "avx512f")]
3854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3855#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3856#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3857pub const fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3858    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
3859}
3860
3861/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3862///
3863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3864#[inline]
3865#[target_feature(enable = "avx512f")]
3866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3867#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3869pub const fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3870    unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
3871}
3872
3873/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3874///
3875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3876#[inline]
3877#[target_feature(enable = "avx512f,avx512vl")]
3878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3879#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3881pub const fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3882    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3888#[inline]
3889#[target_feature(enable = "avx512f,avx512vl")]
3890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3891#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3893pub const fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3894    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
3895}
3896
3897/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3898///
3899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3900#[inline]
3901#[target_feature(enable = "avx512f,avx512vl")]
3902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3903#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3905pub const fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3906    unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
3907}
3908
3909/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3910///
3911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3912#[inline]
3913#[target_feature(enable = "avx512f,avx512vl")]
3914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3915#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3917pub const fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3918    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
3919}
3920
3921/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3922///
3923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3924#[inline]
3925#[target_feature(enable = "avx512f,avx512vl")]
3926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3927#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3929pub const fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3930    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
3931}
3932
3933/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3934///
3935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3936#[inline]
3937#[target_feature(enable = "avx512f,avx512vl")]
3938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3939#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3941pub const fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3942    unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
3943}
3944
3945/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3946///
3947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3948#[inline]
3949#[target_feature(enable = "avx512f")]
3950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3951#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3953pub const fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3954    unsafe { simd_fma(a, b, simd_neg(c)) }
3955}
3956
3957/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3958///
3959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3960#[inline]
3961#[target_feature(enable = "avx512f")]
3962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3963#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3965pub const fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3966    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
3967}
3968
3969/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3970///
3971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3972#[inline]
3973#[target_feature(enable = "avx512f")]
3974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3975#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3977pub const fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3978    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
3979}
3980
3981/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3982///
3983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3984#[inline]
3985#[target_feature(enable = "avx512f")]
3986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3987#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
3989pub const fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3990    unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
3991}
3992
3993/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3994///
3995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3996#[inline]
3997#[target_feature(enable = "avx512f,avx512vl")]
3998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3999#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4001pub const fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4002    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
4003}
4004
4005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4006///
4007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
4008#[inline]
4009#[target_feature(enable = "avx512f,avx512vl")]
4010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4011#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4013pub const fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4014    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
4015}
4016
4017/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4018///
4019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
4020#[inline]
4021#[target_feature(enable = "avx512f,avx512vl")]
4022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4023#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4025pub const fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4026    unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
4027}
4028
4029/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4030///
4031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
4032#[inline]
4033#[target_feature(enable = "avx512f,avx512vl")]
4034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4035#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4037pub const fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4038    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
4039}
4040
4041/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4042///
4043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
4044#[inline]
4045#[target_feature(enable = "avx512f,avx512vl")]
4046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4047#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4049pub const fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4050    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
4051}
4052
4053/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4054///
4055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
4056#[inline]
4057#[target_feature(enable = "avx512f,avx512vl")]
4058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4059#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
4060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4061pub const fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4062    unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
4063}
4064
4065/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
4066///
4067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
4068#[inline]
4069#[target_feature(enable = "avx512f")]
4070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4071#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4073pub const fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4074    unsafe { simd_fma(a, b, simd_neg(c)) }
4075}
4076
4077/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4078///
4079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
4080#[inline]
4081#[target_feature(enable = "avx512f")]
4082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4083#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4085pub const fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4086    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
4087}
4088
4089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4090///
4091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
4092#[inline]
4093#[target_feature(enable = "avx512f")]
4094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4095#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4097pub const fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4098    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
4099}
4100
4101/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4102///
4103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
4104#[inline]
4105#[target_feature(enable = "avx512f")]
4106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4107#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4109pub const fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4110    unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
4111}
4112
4113/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4114///
4115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
4116#[inline]
4117#[target_feature(enable = "avx512f,avx512vl")]
4118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4119#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4121pub const fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4122    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
4123}
4124
4125/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4126///
4127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
4128#[inline]
4129#[target_feature(enable = "avx512f,avx512vl")]
4130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4131#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4133pub const fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4134    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
4135}
4136
4137/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4138///
4139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
4140#[inline]
4141#[target_feature(enable = "avx512f,avx512vl")]
4142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4143#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4145pub const fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4146    unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
4147}
4148
4149/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4150///
4151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
4152#[inline]
4153#[target_feature(enable = "avx512f,avx512vl")]
4154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4155#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4157pub const fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4158    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4167#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4169pub const fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4170    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
4171}
4172
4173/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4174///
4175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
4176#[inline]
4177#[target_feature(enable = "avx512f,avx512vl")]
4178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4179#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
4180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4181pub const fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4182    unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
4183}
4184
4185/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4186///
4187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
4188#[inline]
4189#[target_feature(enable = "avx512f")]
4190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4191#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4193pub const fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4194    unsafe {
4195        let add = simd_fma(a, b, c);
4196        let sub = simd_fma(a, b, simd_neg(c));
4197        simd_shuffle!(
4198            add,
4199            sub,
4200            [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
4201        )
4202    }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4208#[inline]
4209#[target_feature(enable = "avx512f")]
4210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4213pub const fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4214    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
4215}
4216
4217/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4218///
4219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4220#[inline]
4221#[target_feature(enable = "avx512f")]
4222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4223#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4224#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4225pub const fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4226    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
4227}
4228
4229/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4230///
4231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4232#[inline]
4233#[target_feature(enable = "avx512f")]
4234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4235#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4236#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4237pub const fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4238    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
4239}
4240
4241/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4242///
4243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4244#[inline]
4245#[target_feature(enable = "avx512f,avx512vl")]
4246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4247#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4249pub const fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4250    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
4251}
4252
4253/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4254///
4255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4256#[inline]
4257#[target_feature(enable = "avx512f,avx512vl")]
4258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4259#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4261pub const fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4262    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
4263}
4264
4265/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4266///
4267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4268#[inline]
4269#[target_feature(enable = "avx512f,avx512vl")]
4270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4271#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4272#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4273pub const fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4274    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
4275}
4276
4277/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4278///
4279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4280#[inline]
4281#[target_feature(enable = "avx512f,avx512vl")]
4282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4283#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4285pub const fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4286    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
4287}
4288
4289/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4290///
4291/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4292#[inline]
4293#[target_feature(enable = "avx512f,avx512vl")]
4294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4295#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4297pub const fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4298    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4307#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4308#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4309pub const fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4310    unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
4311}
4312
4313/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4314///
4315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4316#[inline]
4317#[target_feature(enable = "avx512f")]
4318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4319#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4320#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4321pub const fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4322    unsafe {
4323        let add = simd_fma(a, b, c);
4324        let sub = simd_fma(a, b, simd_neg(c));
4325        simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4326    }
4327}
4328
4329/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4330///
4331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4332#[inline]
4333#[target_feature(enable = "avx512f")]
4334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4335#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4337pub const fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4338    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
4339}
4340
4341/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4342///
4343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4344#[inline]
4345#[target_feature(enable = "avx512f")]
4346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4347#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4349pub const fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4350    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
4351}
4352
4353/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4354///
4355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4356#[inline]
4357#[target_feature(enable = "avx512f")]
4358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4359#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4361pub const fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4362    unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
4363}
4364
4365/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4366///
4367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4368#[inline]
4369#[target_feature(enable = "avx512f,avx512vl")]
4370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4371#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4373pub const fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4374    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
4375}
4376
4377/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4378///
4379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4380#[inline]
4381#[target_feature(enable = "avx512f,avx512vl")]
4382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4383#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4385pub const fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4386    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
4387}
4388
4389/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4390///
4391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4392#[inline]
4393#[target_feature(enable = "avx512f,avx512vl")]
4394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4395#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4397pub const fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4398    unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
4399}
4400
4401/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4402///
4403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4404#[inline]
4405#[target_feature(enable = "avx512f,avx512vl")]
4406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4407#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4408#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4409pub const fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4410    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
4411}
4412
4413/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4414///
4415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4416#[inline]
4417#[target_feature(enable = "avx512f,avx512vl")]
4418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4419#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4420#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4421pub const fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4422    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
4423}
4424
4425/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4426///
4427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4428#[inline]
4429#[target_feature(enable = "avx512f,avx512vl")]
4430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4431#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4432#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4433pub const fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4434    unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
4435}
4436
4437/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4440#[inline]
4441#[target_feature(enable = "avx512f")]
4442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4445pub const fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4446    unsafe {
4447        let add = simd_fma(a, b, c);
4448        let sub = simd_fma(a, b, simd_neg(c));
4449        simd_shuffle!(
4450            add,
4451            sub,
4452            [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4453        )
4454    }
4455}
4456
4457/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4458///
4459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4460#[inline]
4461#[target_feature(enable = "avx512f")]
4462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4463#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4464#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4465pub const fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4466    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
4467}
4468
4469/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4470///
4471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4472#[inline]
4473#[target_feature(enable = "avx512f")]
4474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4475#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4477pub const fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4487#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4488#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4489pub const fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4490    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
4491}
4492
4493/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4494///
4495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4496#[inline]
4497#[target_feature(enable = "avx512f,avx512vl")]
4498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4499#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4501pub const fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4502    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
4503}
4504
4505/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4506///
4507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4508#[inline]
4509#[target_feature(enable = "avx512f,avx512vl")]
4510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4511#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4513pub const fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4514    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
4515}
4516
4517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4518///
4519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4520#[inline]
4521#[target_feature(enable = "avx512f,avx512vl")]
4522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4523#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4525pub const fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4526    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
4527}
4528
4529/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4530///
4531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4532#[inline]
4533#[target_feature(enable = "avx512f,avx512vl")]
4534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4535#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4537pub const fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4538    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
4539}
4540
4541/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4542///
4543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4544#[inline]
4545#[target_feature(enable = "avx512f,avx512vl")]
4546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4547#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4549pub const fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4550    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
4551}
4552
4553/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4554///
4555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4556#[inline]
4557#[target_feature(enable = "avx512f,avx512vl")]
4558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4559#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4561pub const fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4562    unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
4563}
4564
4565/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4566///
4567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4568#[inline]
4569#[target_feature(enable = "avx512f")]
4570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4571#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4573pub const fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4574    unsafe {
4575        let add = simd_fma(a, b, c);
4576        let sub = simd_fma(a, b, simd_neg(c));
4577        simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4578    }
4579}
4580
4581/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4582///
4583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4584#[inline]
4585#[target_feature(enable = "avx512f")]
4586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4587#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4589pub const fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4590    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
4591}
4592
4593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4594///
4595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4596#[inline]
4597#[target_feature(enable = "avx512f")]
4598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4599#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4601pub const fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4602    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
4603}
4604
4605/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4606///
4607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4608#[inline]
4609#[target_feature(enable = "avx512f")]
4610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4611#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4612#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4613pub const fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4614    unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
4615}
4616
4617/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4618///
4619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4620#[inline]
4621#[target_feature(enable = "avx512f,avx512vl")]
4622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4623#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4624#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4625pub const fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4626    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
4627}
4628
4629/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4630///
4631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4632#[inline]
4633#[target_feature(enable = "avx512f,avx512vl")]
4634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4635#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4637pub const fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4638    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
4639}
4640
4641/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4642///
4643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4644#[inline]
4645#[target_feature(enable = "avx512f,avx512vl")]
4646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4647#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4648#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4649pub const fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4650    unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
4651}
4652
4653/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4654///
4655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4656#[inline]
4657#[target_feature(enable = "avx512f,avx512vl")]
4658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4659#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4660#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4661pub const fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4662    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
4663}
4664
4665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4666///
4667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4668#[inline]
4669#[target_feature(enable = "avx512f,avx512vl")]
4670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4671#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4672#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4673pub const fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4674    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
4675}
4676
4677/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4678///
4679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4680#[inline]
4681#[target_feature(enable = "avx512f,avx512vl")]
4682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4683#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4685pub const fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4686    unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
4687}
4688
4689/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4690///
4691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4692#[inline]
4693#[target_feature(enable = "avx512f")]
4694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4695#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4697pub const fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4698    unsafe { simd_fma(simd_neg(a), b, c) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4709pub const fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4710    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
4711}
4712
4713/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4714///
4715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4716#[inline]
4717#[target_feature(enable = "avx512f")]
4718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4719#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4721pub const fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4722    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
4723}
4724
4725/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4726///
4727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4728#[inline]
4729#[target_feature(enable = "avx512f")]
4730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4731#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4733pub const fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4734    unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
4735}
4736
4737/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4738///
4739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4740#[inline]
4741#[target_feature(enable = "avx512f,avx512vl")]
4742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4743#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4745pub const fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4746    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
4747}
4748
4749/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4750///
4751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4752#[inline]
4753#[target_feature(enable = "avx512f,avx512vl")]
4754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4755#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4757pub const fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4758    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
4759}
4760
4761/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4762///
4763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4764#[inline]
4765#[target_feature(enable = "avx512f,avx512vl")]
4766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4767#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4769pub const fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4770    unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
4771}
4772
4773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4774///
4775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4776#[inline]
4777#[target_feature(enable = "avx512f,avx512vl")]
4778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4779#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4781pub const fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4782    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
4783}
4784
4785/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4786///
4787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4788#[inline]
4789#[target_feature(enable = "avx512f,avx512vl")]
4790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4791#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4793pub const fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4794    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
4795}
4796
4797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4798///
4799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4800#[inline]
4801#[target_feature(enable = "avx512f,avx512vl")]
4802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4803#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4805pub const fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4806    unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
4807}
4808
4809/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4810///
4811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4812#[inline]
4813#[target_feature(enable = "avx512f")]
4814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4815#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4817pub const fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4818    unsafe { simd_fma(simd_neg(a), b, c) }
4819}
4820
4821/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4822///
4823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4824#[inline]
4825#[target_feature(enable = "avx512f")]
4826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4827#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4829pub const fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4830    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4836#[inline]
4837#[target_feature(enable = "avx512f")]
4838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4839#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4841pub const fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4842    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
4843}
4844
4845/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4846///
4847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4848#[inline]
4849#[target_feature(enable = "avx512f")]
4850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4851#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4853pub const fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4854    unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
4855}
4856
4857/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4858///
4859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4860#[inline]
4861#[target_feature(enable = "avx512f,avx512vl")]
4862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4863#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4864#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4865pub const fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4866    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
4867}
4868
4869/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4870///
4871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4872#[inline]
4873#[target_feature(enable = "avx512f,avx512vl")]
4874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4875#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4877pub const fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4878    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
4879}
4880
4881/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4882///
4883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4884#[inline]
4885#[target_feature(enable = "avx512f,avx512vl")]
4886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4887#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4888#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4889pub const fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4890    unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
4891}
4892
4893/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4894///
4895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4896#[inline]
4897#[target_feature(enable = "avx512f,avx512vl")]
4898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4899#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4901pub const fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4902    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
4903}
4904
4905/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4906///
4907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4908#[inline]
4909#[target_feature(enable = "avx512f,avx512vl")]
4910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4911#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4913pub const fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4914    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
4915}
4916
4917/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4918///
4919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4920#[inline]
4921#[target_feature(enable = "avx512f,avx512vl")]
4922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4923#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4925pub const fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4926    unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
4927}
4928
4929/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4930///
4931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4932#[inline]
4933#[target_feature(enable = "avx512f")]
4934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4935#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4937pub const fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4938    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
4939}
4940
4941/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4942///
4943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4944#[inline]
4945#[target_feature(enable = "avx512f")]
4946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4947#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4949pub const fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4950    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
4951}
4952
4953/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4954///
4955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4956#[inline]
4957#[target_feature(enable = "avx512f")]
4958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4959#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4961pub const fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4962    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
4963}
4964
4965/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4968#[inline]
4969#[target_feature(enable = "avx512f")]
4970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4971#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4973pub const fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4974    unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
4975}
4976
4977/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4978///
4979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4980#[inline]
4981#[target_feature(enable = "avx512f,avx512vl")]
4982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4983#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4985pub const fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4986    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
4987}
4988
4989/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4990///
4991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4992#[inline]
4993#[target_feature(enable = "avx512f,avx512vl")]
4994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4995#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4997pub const fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4998    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
4999}
5000
5001/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5002///
5003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
5004#[inline]
5005#[target_feature(enable = "avx512f,avx512vl")]
5006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5007#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5009pub const fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
5010    unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
5011}
5012
5013/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5014///
5015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
5016#[inline]
5017#[target_feature(enable = "avx512f,avx512vl")]
5018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5019#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5021pub const fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
5022    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
5023}
5024
5025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5026///
5027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
5028#[inline]
5029#[target_feature(enable = "avx512f,avx512vl")]
5030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5031#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5032#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5033pub const fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
5034    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
5035}
5036
5037/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5038///
5039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
5040#[inline]
5041#[target_feature(enable = "avx512f,avx512vl")]
5042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5043#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
5044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5045pub const fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
5046    unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
5047}
5048
5049/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
5050///
5051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
5052#[inline]
5053#[target_feature(enable = "avx512f")]
5054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5055#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5057pub const fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5058    unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
5059}
5060
5061/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5062///
5063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
5064#[inline]
5065#[target_feature(enable = "avx512f")]
5066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5067#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5068#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5069pub const fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
5070    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
5071}
5072
5073/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5074///
5075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
5076#[inline]
5077#[target_feature(enable = "avx512f")]
5078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5079#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5081pub const fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
5082    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
5083}
5084
5085/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5086///
5087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
5088#[inline]
5089#[target_feature(enable = "avx512f")]
5090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5091#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5092#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5093pub const fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
5094    unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
5095}
5096
5097/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
5100#[inline]
5101#[target_feature(enable = "avx512f,avx512vl")]
5102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5103#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5105pub const fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
5106    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
5107}
5108
5109/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5110///
5111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
5112#[inline]
5113#[target_feature(enable = "avx512f,avx512vl")]
5114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5115#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5117pub const fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
5118    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
5119}
5120
5121/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5122///
5123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
5124#[inline]
5125#[target_feature(enable = "avx512f,avx512vl")]
5126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5127#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5129pub const fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
5130    unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
5131}
5132
5133/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
5134///
5135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
5136#[inline]
5137#[target_feature(enable = "avx512f,avx512vl")]
5138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5139#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5141pub const fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
5142    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
5143}
5144
5145/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5146///
5147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
5148#[inline]
5149#[target_feature(enable = "avx512f,avx512vl")]
5150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5151#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5153pub const fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
5154    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
5155}
5156
5157/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
5158///
5159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
5160#[inline]
5161#[target_feature(enable = "avx512f,avx512vl")]
5162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5163#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
5164#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5165pub const fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
5166    unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
5167}
5168
5169/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5170///
5171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
5172#[inline]
5173#[target_feature(enable = "avx512f")]
5174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5175#[cfg_attr(test, assert_instr(vrcp14ps))]
5176pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
5177    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5178}
5179
5180/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5181///
5182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
5183#[inline]
5184#[target_feature(enable = "avx512f")]
5185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5186#[cfg_attr(test, assert_instr(vrcp14ps))]
5187pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5188    unsafe { transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5189}
5190
5191/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5192///
5193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
5194#[inline]
5195#[target_feature(enable = "avx512f")]
5196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5197#[cfg_attr(test, assert_instr(vrcp14ps))]
5198pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
5199    unsafe { transmute(vrcp14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5200}
5201
5202/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5203///
5204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
5205#[inline]
5206#[target_feature(enable = "avx512f,avx512vl")]
5207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5208#[cfg_attr(test, assert_instr(vrcp14ps))]
5209pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
5210    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5211}
5212
5213/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5214///
5215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
5216#[inline]
5217#[target_feature(enable = "avx512f,avx512vl")]
5218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5219#[cfg_attr(test, assert_instr(vrcp14ps))]
5220pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5221    unsafe { transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5222}
5223
5224/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5225///
5226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
5227#[inline]
5228#[target_feature(enable = "avx512f,avx512vl")]
5229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5230#[cfg_attr(test, assert_instr(vrcp14ps))]
5231pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
5232    unsafe { transmute(vrcp14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5233}
5234
5235/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5236///
5237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
5238#[inline]
5239#[target_feature(enable = "avx512f,avx512vl")]
5240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5241#[cfg_attr(test, assert_instr(vrcp14ps))]
5242pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
5243    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5244}
5245
5246/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5247///
5248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
5249#[inline]
5250#[target_feature(enable = "avx512f,avx512vl")]
5251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5252#[cfg_attr(test, assert_instr(vrcp14ps))]
5253pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5254    unsafe { transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5255}
5256
5257/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5258///
5259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
5260#[inline]
5261#[target_feature(enable = "avx512f,avx512vl")]
5262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5263#[cfg_attr(test, assert_instr(vrcp14ps))]
5264pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
5265    unsafe { transmute(vrcp14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5266}
5267
5268/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5269///
5270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
5271#[inline]
5272#[target_feature(enable = "avx512f")]
5273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5274#[cfg_attr(test, assert_instr(vrcp14pd))]
5275pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
5276    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5277}
5278
5279/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5280///
5281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5282#[inline]
5283#[target_feature(enable = "avx512f")]
5284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5285#[cfg_attr(test, assert_instr(vrcp14pd))]
5286pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5287    unsafe { transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5288}
5289
5290/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5291///
5292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5293#[inline]
5294#[target_feature(enable = "avx512f")]
5295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5296#[cfg_attr(test, assert_instr(vrcp14pd))]
5297pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5298    unsafe { transmute(vrcp14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5299}
5300
5301/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5302///
5303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5304#[inline]
5305#[target_feature(enable = "avx512f,avx512vl")]
5306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5307#[cfg_attr(test, assert_instr(vrcp14pd))]
5308pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5309    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5310}
5311
5312/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5313///
5314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5315#[inline]
5316#[target_feature(enable = "avx512f,avx512vl")]
5317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5318#[cfg_attr(test, assert_instr(vrcp14pd))]
5319pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5320    unsafe { transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5321}
5322
5323/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5324///
5325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5326#[inline]
5327#[target_feature(enable = "avx512f,avx512vl")]
5328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5329#[cfg_attr(test, assert_instr(vrcp14pd))]
5330pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5331    unsafe { transmute(vrcp14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5332}
5333
5334/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5335///
5336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5337#[inline]
5338#[target_feature(enable = "avx512f,avx512vl")]
5339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5340#[cfg_attr(test, assert_instr(vrcp14pd))]
5341pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5342    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5343}
5344
5345/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5346///
5347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5348#[inline]
5349#[target_feature(enable = "avx512f,avx512vl")]
5350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5351#[cfg_attr(test, assert_instr(vrcp14pd))]
5352pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5353    unsafe { transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5354}
5355
5356/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5357///
5358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5359#[inline]
5360#[target_feature(enable = "avx512f,avx512vl")]
5361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5362#[cfg_attr(test, assert_instr(vrcp14pd))]
5363pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5364    unsafe { transmute(vrcp14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5365}
5366
5367/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5368///
5369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5370#[inline]
5371#[target_feature(enable = "avx512f")]
5372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5373#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5374pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5375    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, 0b11111111_11111111)) }
5376}
5377
5378/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5379///
5380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5381#[inline]
5382#[target_feature(enable = "avx512f")]
5383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5384#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5385pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5386    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k)) }
5387}
5388
5389/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5390///
5391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5392#[inline]
5393#[target_feature(enable = "avx512f")]
5394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5395#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5396pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5397    unsafe { transmute(vrsqrt14ps(a.as_f32x16(), f32x16::ZERO, k)) }
5398}
5399
5400/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5401///
5402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5403#[inline]
5404#[target_feature(enable = "avx512f,avx512vl")]
5405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5406#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5407pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5408    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5409}
5410
5411/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5412///
5413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5414#[inline]
5415#[target_feature(enable = "avx512f,avx512vl")]
5416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5417#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5418pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5419    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k)) }
5420}
5421
5422/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5425#[inline]
5426#[target_feature(enable = "avx512f,avx512vl")]
5427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5428#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5429pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5430    unsafe { transmute(vrsqrt14ps256(a.as_f32x8(), f32x8::ZERO, k)) }
5431}
5432
5433/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5434///
5435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5436#[inline]
5437#[target_feature(enable = "avx512f,avx512vl")]
5438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5439#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5440pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5441    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5442}
5443
5444/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5445///
5446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5447#[inline]
5448#[target_feature(enable = "avx512f,avx512vl")]
5449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5450#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5451pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5452    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k)) }
5453}
5454
5455/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5456///
5457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5458#[inline]
5459#[target_feature(enable = "avx512f,avx512vl")]
5460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5461#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5462pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5463    unsafe { transmute(vrsqrt14ps128(a.as_f32x4(), f32x4::ZERO, k)) }
5464}
5465
5466/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5467///
5468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5469#[inline]
5470#[target_feature(enable = "avx512f")]
5471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5472#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5473pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5474    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, 0b11111111)) }
5475}
5476
5477/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5478///
5479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5480#[inline]
5481#[target_feature(enable = "avx512f")]
5482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5483#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5484pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5485    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k)) }
5486}
5487
5488/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5489///
5490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5491#[inline]
5492#[target_feature(enable = "avx512f")]
5493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5494#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5495pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5496    unsafe { transmute(vrsqrt14pd(a.as_f64x8(), f64x8::ZERO, k)) }
5497}
5498
5499/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5500///
5501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5502#[inline]
5503#[target_feature(enable = "avx512f,avx512vl")]
5504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5505#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5506pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5507    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5508}
5509
5510/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5511///
5512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5513#[inline]
5514#[target_feature(enable = "avx512f,avx512vl")]
5515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5516#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5517pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5518    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k)) }
5519}
5520
5521/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5522///
5523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5524#[inline]
5525#[target_feature(enable = "avx512f,avx512vl")]
5526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5527#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5528pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5529    unsafe { transmute(vrsqrt14pd256(a.as_f64x4(), f64x4::ZERO, k)) }
5530}
5531
5532/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5533///
5534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5535#[inline]
5536#[target_feature(enable = "avx512f,avx512vl")]
5537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5538#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5539pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5540    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5541}
5542
5543/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5544///
5545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5546#[inline]
5547#[target_feature(enable = "avx512f,avx512vl")]
5548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5549#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5550pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5551    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k)) }
5552}
5553
5554/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5555///
5556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5557#[inline]
5558#[target_feature(enable = "avx512f,avx512vl")]
5559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5560#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5561pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5562    unsafe { transmute(vrsqrt14pd128(a.as_f64x2(), f64x2::ZERO, k)) }
5563}
5564
5565/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5566///
5567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5568#[inline]
5569#[target_feature(enable = "avx512f")]
5570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5571#[cfg_attr(test, assert_instr(vgetexpps))]
5572pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5573    unsafe {
5574        transmute(vgetexpps(
5575            a.as_f32x16(),
5576            f32x16::ZERO,
5577            0b11111111_11111111,
5578            _MM_FROUND_CUR_DIRECTION,
5579        ))
5580    }
5581}
5582
5583/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[cfg_attr(test, assert_instr(vgetexpps))]
5590pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5591    unsafe {
5592        transmute(vgetexpps(
5593            a.as_f32x16(),
5594            src.as_f32x16(),
5595            k,
5596            _MM_FROUND_CUR_DIRECTION,
5597        ))
5598    }
5599}
5600
5601/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5602///
5603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5604#[inline]
5605#[target_feature(enable = "avx512f")]
5606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5607#[cfg_attr(test, assert_instr(vgetexpps))]
5608pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5609    unsafe {
5610        transmute(vgetexpps(
5611            a.as_f32x16(),
5612            f32x16::ZERO,
5613            k,
5614            _MM_FROUND_CUR_DIRECTION,
5615        ))
5616    }
5617}
5618
5619/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5620///
5621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5622#[inline]
5623#[target_feature(enable = "avx512f,avx512vl")]
5624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5625#[cfg_attr(test, assert_instr(vgetexpps))]
5626pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5627    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, 0b11111111)) }
5628}
5629
5630/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5631///
5632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5633#[inline]
5634#[target_feature(enable = "avx512f,avx512vl")]
5635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5636#[cfg_attr(test, assert_instr(vgetexpps))]
5637pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638    unsafe { transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k)) }
5639}
5640
5641/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5642///
5643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5644#[inline]
5645#[target_feature(enable = "avx512f,avx512vl")]
5646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5647#[cfg_attr(test, assert_instr(vgetexpps))]
5648pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5649    unsafe { transmute(vgetexpps256(a.as_f32x8(), f32x8::ZERO, k)) }
5650}
5651
5652/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5653///
5654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5655#[inline]
5656#[target_feature(enable = "avx512f,avx512vl")]
5657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5658#[cfg_attr(test, assert_instr(vgetexpps))]
5659pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5660    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, 0b00001111)) }
5661}
5662
5663/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5664///
5665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5666#[inline]
5667#[target_feature(enable = "avx512f,avx512vl")]
5668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5669#[cfg_attr(test, assert_instr(vgetexpps))]
5670pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5671    unsafe { transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k)) }
5672}
5673
5674/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5675///
5676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5677#[inline]
5678#[target_feature(enable = "avx512f,avx512vl")]
5679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5680#[cfg_attr(test, assert_instr(vgetexpps))]
5681pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5682    unsafe { transmute(vgetexpps128(a.as_f32x4(), f32x4::ZERO, k)) }
5683}
5684
5685/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5686///
5687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5688#[inline]
5689#[target_feature(enable = "avx512f")]
5690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5691#[cfg_attr(test, assert_instr(vgetexppd))]
5692pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5693    unsafe {
5694        transmute(vgetexppd(
5695            a.as_f64x8(),
5696            f64x8::ZERO,
5697            0b11111111,
5698            _MM_FROUND_CUR_DIRECTION,
5699        ))
5700    }
5701}
5702
5703/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5704///
5705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5706#[inline]
5707#[target_feature(enable = "avx512f")]
5708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5709#[cfg_attr(test, assert_instr(vgetexppd))]
5710pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5711    unsafe {
5712        transmute(vgetexppd(
5713            a.as_f64x8(),
5714            src.as_f64x8(),
5715            k,
5716            _MM_FROUND_CUR_DIRECTION,
5717        ))
5718    }
5719}
5720
5721/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5722///
5723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5724#[inline]
5725#[target_feature(enable = "avx512f")]
5726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5727#[cfg_attr(test, assert_instr(vgetexppd))]
5728pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5729    unsafe {
5730        transmute(vgetexppd(
5731            a.as_f64x8(),
5732            f64x8::ZERO,
5733            k,
5734            _MM_FROUND_CUR_DIRECTION,
5735        ))
5736    }
5737}
5738
5739/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5740///
5741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5742#[inline]
5743#[target_feature(enable = "avx512f,avx512vl")]
5744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5745#[cfg_attr(test, assert_instr(vgetexppd))]
5746pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5747    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, 0b00001111)) }
5748}
5749
5750/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5751///
5752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5753#[inline]
5754#[target_feature(enable = "avx512f,avx512vl")]
5755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5756#[cfg_attr(test, assert_instr(vgetexppd))]
5757pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5758    unsafe { transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k)) }
5759}
5760
5761/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5762///
5763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5764#[inline]
5765#[target_feature(enable = "avx512f,avx512vl")]
5766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5767#[cfg_attr(test, assert_instr(vgetexppd))]
5768pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5769    unsafe { transmute(vgetexppd256(a.as_f64x4(), f64x4::ZERO, k)) }
5770}
5771
5772/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5773///
5774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5775#[inline]
5776#[target_feature(enable = "avx512f,avx512vl")]
5777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5778#[cfg_attr(test, assert_instr(vgetexppd))]
5779pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5780    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, 0b00000011)) }
5781}
5782
5783/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5784///
5785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5786#[inline]
5787#[target_feature(enable = "avx512f,avx512vl")]
5788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5789#[cfg_attr(test, assert_instr(vgetexppd))]
5790pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5791    unsafe { transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k)) }
5792}
5793
5794/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5795///
5796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5797#[inline]
5798#[target_feature(enable = "avx512f,avx512vl")]
5799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5800#[cfg_attr(test, assert_instr(vgetexppd))]
5801pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5802    unsafe { transmute(vgetexppd128(a.as_f64x2(), f64x2::ZERO, k)) }
5803}
5804
5805/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5806/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5807/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5808/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5809/// * [`_MM_FROUND_TO_POS_INF`] : round up
5810/// * [`_MM_FROUND_TO_ZERO`] : truncate
5811/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5812///
5813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5814#[inline]
5815#[target_feature(enable = "avx512f")]
5816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5817#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5818#[rustc_legacy_const_generics(1)]
5819pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5820    unsafe {
5821        static_assert_uimm_bits!(IMM8, 8);
5822        let a = a.as_f32x16();
5823        let r = vrndscaleps(
5824            a,
5825            IMM8,
5826            f32x16::ZERO,
5827            0b11111111_11111111,
5828            _MM_FROUND_CUR_DIRECTION,
5829        );
5830        transmute(r)
5831    }
5832}
5833
5834/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5835/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5836/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5837/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5838/// * [`_MM_FROUND_TO_POS_INF`] : round up
5839/// * [`_MM_FROUND_TO_ZERO`] : truncate
5840/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5841///
5842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5843#[inline]
5844#[target_feature(enable = "avx512f")]
5845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5846#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5847#[rustc_legacy_const_generics(3)]
5848pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5849    unsafe {
5850        static_assert_uimm_bits!(IMM8, 8);
5851        let a = a.as_f32x16();
5852        let src = src.as_f32x16();
5853        let r = vrndscaleps(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
5854        transmute(r)
5855    }
5856}
5857
5858/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5859/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5860/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5861/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5862/// * [`_MM_FROUND_TO_POS_INF`] : round up
5863/// * [`_MM_FROUND_TO_ZERO`] : truncate
5864/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5865///
5866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5867#[inline]
5868#[target_feature(enable = "avx512f")]
5869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5870#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5871#[rustc_legacy_const_generics(2)]
5872pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5873    unsafe {
5874        static_assert_uimm_bits!(IMM8, 8);
5875        let a = a.as_f32x16();
5876        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, _MM_FROUND_CUR_DIRECTION);
5877        transmute(r)
5878    }
5879}
5880
5881/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5882/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5883/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5884/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5885/// * [`_MM_FROUND_TO_POS_INF`] : round up
5886/// * [`_MM_FROUND_TO_ZERO`] : truncate
5887/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5888///
5889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5890#[inline]
5891#[target_feature(enable = "avx512f,avx512vl")]
5892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5893#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5894#[rustc_legacy_const_generics(1)]
5895pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5896    unsafe {
5897        static_assert_uimm_bits!(IMM8, 8);
5898        let a = a.as_f32x8();
5899        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, 0b11111111);
5900        transmute(r)
5901    }
5902}
5903
5904/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5905/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5906/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5907/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5908/// * [`_MM_FROUND_TO_POS_INF`] : round up
5909/// * [`_MM_FROUND_TO_ZERO`] : truncate
5910/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5911///
5912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5913#[inline]
5914#[target_feature(enable = "avx512f,avx512vl")]
5915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5916#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5917#[rustc_legacy_const_generics(3)]
5918pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5919    unsafe {
5920        static_assert_uimm_bits!(IMM8, 8);
5921        let a = a.as_f32x8();
5922        let src = src.as_f32x8();
5923        let r = vrndscaleps256(a, IMM8, src, k);
5924        transmute(r)
5925    }
5926}
5927
5928/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5929/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5930/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5931/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5932/// * [`_MM_FROUND_TO_POS_INF`] : round up
5933/// * [`_MM_FROUND_TO_ZERO`] : truncate
5934/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5935///
5936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5937#[inline]
5938#[target_feature(enable = "avx512f,avx512vl")]
5939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5940#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5941#[rustc_legacy_const_generics(2)]
5942pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5943    unsafe {
5944        static_assert_uimm_bits!(IMM8, 8);
5945        let a = a.as_f32x8();
5946        let r = vrndscaleps256(a, IMM8, f32x8::ZERO, k);
5947        transmute(r)
5948    }
5949}
5950
5951/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5952/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5953/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5954/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5955/// * [`_MM_FROUND_TO_POS_INF`] : round up
5956/// * [`_MM_FROUND_TO_ZERO`] : truncate
5957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5958///
5959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5960#[inline]
5961#[target_feature(enable = "avx512f,avx512vl")]
5962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5963#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5964#[rustc_legacy_const_generics(1)]
5965pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5966    unsafe {
5967        static_assert_uimm_bits!(IMM8, 8);
5968        let a = a.as_f32x4();
5969        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, 0b00001111);
5970        transmute(r)
5971    }
5972}
5973
5974/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5975/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5976/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5977/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5978/// * [`_MM_FROUND_TO_POS_INF`] : round up
5979/// * [`_MM_FROUND_TO_ZERO`] : truncate
5980/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5981///
5982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5983#[inline]
5984#[target_feature(enable = "avx512f,avx512vl")]
5985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5986#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5987#[rustc_legacy_const_generics(3)]
5988pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5989    unsafe {
5990        static_assert_uimm_bits!(IMM8, 8);
5991        let a = a.as_f32x4();
5992        let src = src.as_f32x4();
5993        let r = vrndscaleps128(a, IMM8, src, k);
5994        transmute(r)
5995    }
5996}
5997
5998/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5999/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6000/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6001/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6002/// * [`_MM_FROUND_TO_POS_INF`] : round up
6003/// * [`_MM_FROUND_TO_ZERO`] : truncate
6004/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6005///
6006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
6007#[inline]
6008#[target_feature(enable = "avx512f,avx512vl")]
6009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6010#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
6011#[rustc_legacy_const_generics(2)]
6012pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6013    unsafe {
6014        static_assert_uimm_bits!(IMM8, 8);
6015        let a = a.as_f32x4();
6016        let r = vrndscaleps128(a, IMM8, f32x4::ZERO, k);
6017        transmute(r)
6018    }
6019}
6020
6021/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6022/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6023/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6024/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6025/// * [`_MM_FROUND_TO_POS_INF`] : round up
6026/// * [`_MM_FROUND_TO_ZERO`] : truncate
6027/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6028///
6029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
6030#[inline]
6031#[target_feature(enable = "avx512f")]
6032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6033#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6034#[rustc_legacy_const_generics(1)]
6035pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
6036    unsafe {
6037        static_assert_uimm_bits!(IMM8, 8);
6038        let a = a.as_f64x8();
6039        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6040        transmute(r)
6041    }
6042}
6043
6044/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6045/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6046/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6047/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6048/// * [`_MM_FROUND_TO_POS_INF`] : round up
6049/// * [`_MM_FROUND_TO_ZERO`] : truncate
6050/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6051///
6052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
6053#[inline]
6054#[target_feature(enable = "avx512f")]
6055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6056#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6057#[rustc_legacy_const_generics(3)]
6058pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
6059    src: __m512d,
6060    k: __mmask8,
6061    a: __m512d,
6062) -> __m512d {
6063    unsafe {
6064        static_assert_uimm_bits!(IMM8, 8);
6065        let a = a.as_f64x8();
6066        let src = src.as_f64x8();
6067        let r = vrndscalepd(a, IMM8, src, k, _MM_FROUND_CUR_DIRECTION);
6068        transmute(r)
6069    }
6070}
6071
6072/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6073/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6074/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6075/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6076/// * [`_MM_FROUND_TO_POS_INF`] : round up
6077/// * [`_MM_FROUND_TO_ZERO`] : truncate
6078/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6079///
6080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
6081#[inline]
6082#[target_feature(enable = "avx512f")]
6083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6084#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6085#[rustc_legacy_const_generics(2)]
6086pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
6087    unsafe {
6088        static_assert_uimm_bits!(IMM8, 8);
6089        let a = a.as_f64x8();
6090        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, _MM_FROUND_CUR_DIRECTION);
6091        transmute(r)
6092    }
6093}
6094
6095/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6096/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6097/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6098/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6099/// * [`_MM_FROUND_TO_POS_INF`] : round up
6100/// * [`_MM_FROUND_TO_ZERO`] : truncate
6101/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6102///
6103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
6104#[inline]
6105#[target_feature(enable = "avx512f,avx512vl")]
6106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6107#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
6108#[rustc_legacy_const_generics(1)]
6109pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
6110    unsafe {
6111        static_assert_uimm_bits!(IMM8, 8);
6112        let a = a.as_f64x4();
6113        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, 0b00001111);
6114        transmute(r)
6115    }
6116}
6117
6118/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6119/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6120/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6121/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6122/// * [`_MM_FROUND_TO_POS_INF`] : round up
6123/// * [`_MM_FROUND_TO_ZERO`] : truncate
6124/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6125///
6126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
6127#[inline]
6128#[target_feature(enable = "avx512f,avx512vl")]
6129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6130#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6131#[rustc_legacy_const_generics(3)]
6132pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
6133    src: __m256d,
6134    k: __mmask8,
6135    a: __m256d,
6136) -> __m256d {
6137    unsafe {
6138        static_assert_uimm_bits!(IMM8, 8);
6139        let a = a.as_f64x4();
6140        let src = src.as_f64x4();
6141        let r = vrndscalepd256(a, IMM8, src, k);
6142        transmute(r)
6143    }
6144}
6145
6146/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6147/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6148/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6149/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6150/// * [`_MM_FROUND_TO_POS_INF`] : round up
6151/// * [`_MM_FROUND_TO_ZERO`] : truncate
6152/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6158#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6159#[rustc_legacy_const_generics(2)]
6160pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
6161    unsafe {
6162        static_assert_uimm_bits!(IMM8, 8);
6163        let a = a.as_f64x4();
6164        let r = vrndscalepd256(a, IMM8, f64x4::ZERO, k);
6165        transmute(r)
6166    }
6167}
6168
6169/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
6170/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6171/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6172/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6173/// * [`_MM_FROUND_TO_POS_INF`] : round up
6174/// * [`_MM_FROUND_TO_ZERO`] : truncate
6175/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6176///
6177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
6178#[inline]
6179#[target_feature(enable = "avx512f,avx512vl")]
6180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6181#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 16))]
6182#[rustc_legacy_const_generics(1)]
6183pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
6184    unsafe {
6185        static_assert_uimm_bits!(IMM8, 8);
6186        let a = a.as_f64x2();
6187        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, 0b00000011);
6188        transmute(r)
6189    }
6190}
6191
6192/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
6193/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6194/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6195/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6196/// * [`_MM_FROUND_TO_POS_INF`] : round up
6197/// * [`_MM_FROUND_TO_ZERO`] : truncate
6198/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6199///
6200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
6201#[inline]
6202#[target_feature(enable = "avx512f,avx512vl")]
6203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6204#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6205#[rustc_legacy_const_generics(3)]
6206pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
6207    unsafe {
6208        static_assert_uimm_bits!(IMM8, 8);
6209        let a = a.as_f64x2();
6210        let src = src.as_f64x2();
6211        let r = vrndscalepd128(a, IMM8, src, k);
6212        transmute(r)
6213    }
6214}
6215
6216/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
6217/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
6218/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6219/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6220/// * [`_MM_FROUND_TO_POS_INF`] : round up
6221/// * [`_MM_FROUND_TO_ZERO`] : truncate
6222/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6223///
6224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
6225#[inline]
6226#[target_feature(enable = "avx512f,avx512vl")]
6227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6228#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
6229#[rustc_legacy_const_generics(2)]
6230pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
6231    unsafe {
6232        static_assert_uimm_bits!(IMM8, 8);
6233        let a = a.as_f64x2();
6234        let r = vrndscalepd128(a, IMM8, f64x2::ZERO, k);
6235        transmute(r)
6236    }
6237}
6238
6239/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6240///
6241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
6242#[inline]
6243#[target_feature(enable = "avx512f")]
6244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6245#[cfg_attr(test, assert_instr(vscalefps))]
6246pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
6247    unsafe {
6248        transmute(vscalefps(
6249            a.as_f32x16(),
6250            b.as_f32x16(),
6251            f32x16::ZERO,
6252            0b11111111_11111111,
6253            _MM_FROUND_CUR_DIRECTION,
6254        ))
6255    }
6256}
6257
6258/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6259///
6260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
6261#[inline]
6262#[target_feature(enable = "avx512f")]
6263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6264#[cfg_attr(test, assert_instr(vscalefps))]
6265pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
6266    unsafe {
6267        transmute(vscalefps(
6268            a.as_f32x16(),
6269            b.as_f32x16(),
6270            src.as_f32x16(),
6271            k,
6272            _MM_FROUND_CUR_DIRECTION,
6273        ))
6274    }
6275}
6276
6277/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6278///
6279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
6280#[inline]
6281#[target_feature(enable = "avx512f")]
6282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6283#[cfg_attr(test, assert_instr(vscalefps))]
6284pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6285    unsafe {
6286        transmute(vscalefps(
6287            a.as_f32x16(),
6288            b.as_f32x16(),
6289            f32x16::ZERO,
6290            k,
6291            _MM_FROUND_CUR_DIRECTION,
6292        ))
6293    }
6294}
6295
6296/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6297///
6298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6299#[inline]
6300#[target_feature(enable = "avx512f,avx512vl")]
6301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6302#[cfg_attr(test, assert_instr(vscalefps))]
6303pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6304    unsafe {
6305        transmute(vscalefps256(
6306            a.as_f32x8(),
6307            b.as_f32x8(),
6308            f32x8::ZERO,
6309            0b11111111,
6310        ))
6311    }
6312}
6313
6314/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6315///
6316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6317#[inline]
6318#[target_feature(enable = "avx512f,avx512vl")]
6319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6320#[cfg_attr(test, assert_instr(vscalefps))]
6321pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6322    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k)) }
6323}
6324
6325/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6326///
6327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6328#[inline]
6329#[target_feature(enable = "avx512f,avx512vl")]
6330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6331#[cfg_attr(test, assert_instr(vscalefps))]
6332pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6333    unsafe { transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), f32x8::ZERO, k)) }
6334}
6335
6336/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6337///
6338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6339#[inline]
6340#[target_feature(enable = "avx512f,avx512vl")]
6341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6342#[cfg_attr(test, assert_instr(vscalefps))]
6343pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6344    unsafe {
6345        transmute(vscalefps128(
6346            a.as_f32x4(),
6347            b.as_f32x4(),
6348            f32x4::ZERO,
6349            0b00001111,
6350        ))
6351    }
6352}
6353
6354/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6355///
6356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6357#[inline]
6358#[target_feature(enable = "avx512f,avx512vl")]
6359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6360#[cfg_attr(test, assert_instr(vscalefps))]
6361pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6362    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
6363}
6364
6365/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6366///
6367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6368#[inline]
6369#[target_feature(enable = "avx512f,avx512vl")]
6370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6371#[cfg_attr(test, assert_instr(vscalefps))]
6372pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6373    unsafe { transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
6374}
6375
6376/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6377///
6378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6379#[inline]
6380#[target_feature(enable = "avx512f")]
6381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6382#[cfg_attr(test, assert_instr(vscalefpd))]
6383pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6384    unsafe {
6385        transmute(vscalefpd(
6386            a.as_f64x8(),
6387            b.as_f64x8(),
6388            f64x8::ZERO,
6389            0b11111111,
6390            _MM_FROUND_CUR_DIRECTION,
6391        ))
6392    }
6393}
6394
6395/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6396///
6397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6398#[inline]
6399#[target_feature(enable = "avx512f")]
6400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6401#[cfg_attr(test, assert_instr(vscalefpd))]
6402pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6403    unsafe {
6404        transmute(vscalefpd(
6405            a.as_f64x8(),
6406            b.as_f64x8(),
6407            src.as_f64x8(),
6408            k,
6409            _MM_FROUND_CUR_DIRECTION,
6410        ))
6411    }
6412}
6413
6414/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6415///
6416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6417#[inline]
6418#[target_feature(enable = "avx512f")]
6419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6420#[cfg_attr(test, assert_instr(vscalefpd))]
6421pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6422    unsafe {
6423        transmute(vscalefpd(
6424            a.as_f64x8(),
6425            b.as_f64x8(),
6426            f64x8::ZERO,
6427            k,
6428            _MM_FROUND_CUR_DIRECTION,
6429        ))
6430    }
6431}
6432
6433/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6436#[inline]
6437#[target_feature(enable = "avx512f,avx512vl")]
6438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6439#[cfg_attr(test, assert_instr(vscalefpd))]
6440pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6441    unsafe {
6442        transmute(vscalefpd256(
6443            a.as_f64x4(),
6444            b.as_f64x4(),
6445            f64x4::ZERO,
6446            0b00001111,
6447        ))
6448    }
6449}
6450
6451/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6452///
6453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6454#[inline]
6455#[target_feature(enable = "avx512f,avx512vl")]
6456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6457#[cfg_attr(test, assert_instr(vscalefpd))]
6458pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6459    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k)) }
6460}
6461
6462/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6463///
6464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6465#[inline]
6466#[target_feature(enable = "avx512f,avx512vl")]
6467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6468#[cfg_attr(test, assert_instr(vscalefpd))]
6469pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6470    unsafe { transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), f64x4::ZERO, k)) }
6471}
6472
6473/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6474///
6475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6476#[inline]
6477#[target_feature(enable = "avx512f,avx512vl")]
6478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6479#[cfg_attr(test, assert_instr(vscalefpd))]
6480pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6481    unsafe {
6482        transmute(vscalefpd128(
6483            a.as_f64x2(),
6484            b.as_f64x2(),
6485            f64x2::ZERO,
6486            0b00000011,
6487        ))
6488    }
6489}
6490
6491/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6492///
6493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6494#[inline]
6495#[target_feature(enable = "avx512f,avx512vl")]
6496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6497#[cfg_attr(test, assert_instr(vscalefpd))]
6498pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6499    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
6500}
6501
6502/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6503///
6504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6505#[inline]
6506#[target_feature(enable = "avx512f,avx512vl")]
6507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6508#[cfg_attr(test, assert_instr(vscalefpd))]
6509pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6510    unsafe { transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
6511}
6512
6513/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6514///
6515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6516#[inline]
6517#[target_feature(enable = "avx512f")]
6518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6519#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6520#[rustc_legacy_const_generics(3)]
6521pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6522    unsafe {
6523        static_assert_uimm_bits!(IMM8, 8);
6524        let a = a.as_f32x16();
6525        let b = b.as_f32x16();
6526        let c = c.as_i32x16();
6527        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6528        transmute(r)
6529    }
6530}
6531
6532/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6533///
6534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6535#[inline]
6536#[target_feature(enable = "avx512f")]
6537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6538#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6539#[rustc_legacy_const_generics(4)]
6540pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6541    a: __m512,
6542    k: __mmask16,
6543    b: __m512,
6544    c: __m512i,
6545) -> __m512 {
6546    unsafe {
6547        static_assert_uimm_bits!(IMM8, 8);
6548        let a = a.as_f32x16();
6549        let b = b.as_f32x16();
6550        let c = c.as_i32x16();
6551        let r = vfixupimmps(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6552        transmute(r)
6553    }
6554}
6555
6556/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6557///
6558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6559#[inline]
6560#[target_feature(enable = "avx512f")]
6561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6562#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6563#[rustc_legacy_const_generics(4)]
6564pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6565    k: __mmask16,
6566    a: __m512,
6567    b: __m512,
6568    c: __m512i,
6569) -> __m512 {
6570    unsafe {
6571        static_assert_uimm_bits!(IMM8, 8);
6572        let a = a.as_f32x16();
6573        let b = b.as_f32x16();
6574        let c = c.as_i32x16();
6575        let r = vfixupimmpsz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6576        transmute(r)
6577    }
6578}
6579
6580/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6581///
6582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6583#[inline]
6584#[target_feature(enable = "avx512f,avx512vl")]
6585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6586#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6587#[rustc_legacy_const_generics(3)]
6588pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6589    unsafe {
6590        static_assert_uimm_bits!(IMM8, 8);
6591        let a = a.as_f32x8();
6592        let b = b.as_f32x8();
6593        let c = c.as_i32x8();
6594        let r = vfixupimmps256(a, b, c, IMM8, 0b11111111);
6595        transmute(r)
6596    }
6597}
6598
6599/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6600///
6601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6602#[inline]
6603#[target_feature(enable = "avx512f,avx512vl")]
6604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6605#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6606#[rustc_legacy_const_generics(4)]
6607pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6608    a: __m256,
6609    k: __mmask8,
6610    b: __m256,
6611    c: __m256i,
6612) -> __m256 {
6613    unsafe {
6614        static_assert_uimm_bits!(IMM8, 8);
6615        let a = a.as_f32x8();
6616        let b = b.as_f32x8();
6617        let c = c.as_i32x8();
6618        let r = vfixupimmps256(a, b, c, IMM8, k);
6619        transmute(r)
6620    }
6621}
6622
6623/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6624///
6625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6626#[inline]
6627#[target_feature(enable = "avx512f,avx512vl")]
6628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6629#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6630#[rustc_legacy_const_generics(4)]
6631pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6632    k: __mmask8,
6633    a: __m256,
6634    b: __m256,
6635    c: __m256i,
6636) -> __m256 {
6637    unsafe {
6638        static_assert_uimm_bits!(IMM8, 8);
6639        let a = a.as_f32x8();
6640        let b = b.as_f32x8();
6641        let c = c.as_i32x8();
6642        let r = vfixupimmpsz256(a, b, c, IMM8, k);
6643        transmute(r)
6644    }
6645}
6646
6647/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6648///
6649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6650#[inline]
6651#[target_feature(enable = "avx512f,avx512vl")]
6652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6653#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6654#[rustc_legacy_const_generics(3)]
6655pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6656    unsafe {
6657        static_assert_uimm_bits!(IMM8, 8);
6658        let a = a.as_f32x4();
6659        let b = b.as_f32x4();
6660        let c = c.as_i32x4();
6661        let r = vfixupimmps128(a, b, c, IMM8, 0b00001111);
6662        transmute(r)
6663    }
6664}
6665
6666/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6667///
6668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6669#[inline]
6670#[target_feature(enable = "avx512f,avx512vl")]
6671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6672#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6673#[rustc_legacy_const_generics(4)]
6674pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6675    a: __m128,
6676    k: __mmask8,
6677    b: __m128,
6678    c: __m128i,
6679) -> __m128 {
6680    unsafe {
6681        static_assert_uimm_bits!(IMM8, 8);
6682        let a = a.as_f32x4();
6683        let b = b.as_f32x4();
6684        let c = c.as_i32x4();
6685        let r = vfixupimmps128(a, b, c, IMM8, k);
6686        transmute(r)
6687    }
6688}
6689
6690/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6691///
6692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6693#[inline]
6694#[target_feature(enable = "avx512f,avx512vl")]
6695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6696#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6697#[rustc_legacy_const_generics(4)]
6698pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6699    k: __mmask8,
6700    a: __m128,
6701    b: __m128,
6702    c: __m128i,
6703) -> __m128 {
6704    unsafe {
6705        static_assert_uimm_bits!(IMM8, 8);
6706        let a = a.as_f32x4();
6707        let b = b.as_f32x4();
6708        let c = c.as_i32x4();
6709        let r = vfixupimmpsz128(a, b, c, IMM8, k);
6710        transmute(r)
6711    }
6712}
6713
6714/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6715///
6716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6717#[inline]
6718#[target_feature(enable = "avx512f")]
6719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6720#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6721#[rustc_legacy_const_generics(3)]
6722pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6723    unsafe {
6724        static_assert_uimm_bits!(IMM8, 8);
6725        let a = a.as_f64x8();
6726        let b = b.as_f64x8();
6727        let c = c.as_i64x8();
6728        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
6729        transmute(r)
6730    }
6731}
6732
6733/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6734///
6735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6736#[inline]
6737#[target_feature(enable = "avx512f")]
6738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6739#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6740#[rustc_legacy_const_generics(4)]
6741pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6742    a: __m512d,
6743    k: __mmask8,
6744    b: __m512d,
6745    c: __m512i,
6746) -> __m512d {
6747    unsafe {
6748        static_assert_uimm_bits!(IMM8, 8);
6749        let a = a.as_f64x8();
6750        let b = b.as_f64x8();
6751        let c = c.as_i64x8();
6752        let r = vfixupimmpd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6753        transmute(r)
6754    }
6755}
6756
6757/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6758///
6759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6760#[inline]
6761#[target_feature(enable = "avx512f")]
6762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6763#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6764#[rustc_legacy_const_generics(4)]
6765pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6766    k: __mmask8,
6767    a: __m512d,
6768    b: __m512d,
6769    c: __m512i,
6770) -> __m512d {
6771    unsafe {
6772        static_assert_uimm_bits!(IMM8, 8);
6773        let a = a.as_f64x8();
6774        let b = b.as_f64x8();
6775        let c = c.as_i64x8();
6776        let r = vfixupimmpdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
6777        transmute(r)
6778    }
6779}
6780
6781/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6782///
6783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6784#[inline]
6785#[target_feature(enable = "avx512f,avx512vl")]
6786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6787#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6788#[rustc_legacy_const_generics(3)]
6789pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6790    unsafe {
6791        static_assert_uimm_bits!(IMM8, 8);
6792        let a = a.as_f64x4();
6793        let b = b.as_f64x4();
6794        let c = c.as_i64x4();
6795        let r = vfixupimmpd256(a, b, c, IMM8, 0b00001111);
6796        transmute(r)
6797    }
6798}
6799
6800/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6801///
6802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6803#[inline]
6804#[target_feature(enable = "avx512f,avx512vl")]
6805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6806#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6807#[rustc_legacy_const_generics(4)]
6808pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6809    a: __m256d,
6810    k: __mmask8,
6811    b: __m256d,
6812    c: __m256i,
6813) -> __m256d {
6814    unsafe {
6815        static_assert_uimm_bits!(IMM8, 8);
6816        let a = a.as_f64x4();
6817        let b = b.as_f64x4();
6818        let c = c.as_i64x4();
6819        let r = vfixupimmpd256(a, b, c, IMM8, k);
6820        transmute(r)
6821    }
6822}
6823
6824/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6825///
6826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6827#[inline]
6828#[target_feature(enable = "avx512f,avx512vl")]
6829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6830#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6831#[rustc_legacy_const_generics(4)]
6832pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6833    k: __mmask8,
6834    a: __m256d,
6835    b: __m256d,
6836    c: __m256i,
6837) -> __m256d {
6838    unsafe {
6839        static_assert_uimm_bits!(IMM8, 8);
6840        let a = a.as_f64x4();
6841        let b = b.as_f64x4();
6842        let c = c.as_i64x4();
6843        let r = vfixupimmpdz256(a, b, c, IMM8, k);
6844        transmute(r)
6845    }
6846}
6847
6848/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6849///
6850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6851#[inline]
6852#[target_feature(enable = "avx512f,avx512vl")]
6853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6854#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6855#[rustc_legacy_const_generics(3)]
6856pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6857    unsafe {
6858        static_assert_uimm_bits!(IMM8, 8);
6859        let a = a.as_f64x2();
6860        let b = b.as_f64x2();
6861        let c = c.as_i64x2();
6862        let r = vfixupimmpd128(a, b, c, IMM8, 0b00000011);
6863        transmute(r)
6864    }
6865}
6866
6867/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6868///
6869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6870#[inline]
6871#[target_feature(enable = "avx512f,avx512vl")]
6872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6873#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6874#[rustc_legacy_const_generics(4)]
6875pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6876    a: __m128d,
6877    k: __mmask8,
6878    b: __m128d,
6879    c: __m128i,
6880) -> __m128d {
6881    unsafe {
6882        static_assert_uimm_bits!(IMM8, 8);
6883        let a = a.as_f64x2();
6884        let b = b.as_f64x2();
6885        let c = c.as_i64x2();
6886        let r = vfixupimmpd128(a, b, c, IMM8, k);
6887        transmute(r)
6888    }
6889}
6890
6891/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6892///
6893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6894#[inline]
6895#[target_feature(enable = "avx512f,avx512vl")]
6896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6897#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6898#[rustc_legacy_const_generics(4)]
6899pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6900    k: __mmask8,
6901    a: __m128d,
6902    b: __m128d,
6903    c: __m128i,
6904) -> __m128d {
6905    unsafe {
6906        static_assert_uimm_bits!(IMM8, 8);
6907        let a = a.as_f64x2();
6908        let b = b.as_f64x2();
6909        let c = c.as_i64x2();
6910        let r = vfixupimmpdz128(a, b, c, IMM8, k);
6911        transmute(r)
6912    }
6913}
6914
6915/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6916///
6917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6918#[inline]
6919#[target_feature(enable = "avx512f")]
6920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6921#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6922#[rustc_legacy_const_generics(3)]
6923pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6924    unsafe {
6925        static_assert_uimm_bits!(IMM8, 8);
6926        let a = a.as_i32x16();
6927        let b = b.as_i32x16();
6928        let c = c.as_i32x16();
6929        let r = vpternlogd(a, b, c, IMM8);
6930        transmute(r)
6931    }
6932}
6933
6934/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6935///
6936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6937#[inline]
6938#[target_feature(enable = "avx512f")]
6939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6940#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6941#[rustc_legacy_const_generics(4)]
6942pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6943    src: __m512i,
6944    k: __mmask16,
6945    a: __m512i,
6946    b: __m512i,
6947) -> __m512i {
6948    unsafe {
6949        static_assert_uimm_bits!(IMM8, 8);
6950        let src = src.as_i32x16();
6951        let a = a.as_i32x16();
6952        let b = b.as_i32x16();
6953        let r = vpternlogd(src, a, b, IMM8);
6954        transmute(simd_select_bitmask(k, r, src))
6955    }
6956}
6957
6958/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6959///
6960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6961#[inline]
6962#[target_feature(enable = "avx512f")]
6963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6964#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6965#[rustc_legacy_const_generics(4)]
6966pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6967    k: __mmask16,
6968    a: __m512i,
6969    b: __m512i,
6970    c: __m512i,
6971) -> __m512i {
6972    unsafe {
6973        static_assert_uimm_bits!(IMM8, 8);
6974        let a = a.as_i32x16();
6975        let b = b.as_i32x16();
6976        let c = c.as_i32x16();
6977        let r = vpternlogd(a, b, c, IMM8);
6978        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
6979    }
6980}
6981
6982/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6983///
6984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6985#[inline]
6986#[target_feature(enable = "avx512f,avx512vl")]
6987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6988#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6989#[rustc_legacy_const_generics(3)]
6990pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6991    unsafe {
6992        static_assert_uimm_bits!(IMM8, 8);
6993        let a = a.as_i32x8();
6994        let b = b.as_i32x8();
6995        let c = c.as_i32x8();
6996        let r = vpternlogd256(a, b, c, IMM8);
6997        transmute(r)
6998    }
6999}
7000
7001/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7002///
7003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
7004#[inline]
7005#[target_feature(enable = "avx512f,avx512vl")]
7006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7007#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7008#[rustc_legacy_const_generics(4)]
7009pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
7010    src: __m256i,
7011    k: __mmask8,
7012    a: __m256i,
7013    b: __m256i,
7014) -> __m256i {
7015    unsafe {
7016        static_assert_uimm_bits!(IMM8, 8);
7017        let src = src.as_i32x8();
7018        let a = a.as_i32x8();
7019        let b = b.as_i32x8();
7020        let r = vpternlogd256(src, a, b, IMM8);
7021        transmute(simd_select_bitmask(k, r, src))
7022    }
7023}
7024
7025/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7026///
7027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
7028#[inline]
7029#[target_feature(enable = "avx512f,avx512vl")]
7030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7031#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7032#[rustc_legacy_const_generics(4)]
7033pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
7034    k: __mmask8,
7035    a: __m256i,
7036    b: __m256i,
7037    c: __m256i,
7038) -> __m256i {
7039    unsafe {
7040        static_assert_uimm_bits!(IMM8, 8);
7041        let a = a.as_i32x8();
7042        let b = b.as_i32x8();
7043        let c = c.as_i32x8();
7044        let r = vpternlogd256(a, b, c, IMM8);
7045        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
7046    }
7047}
7048
7049/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7050///
7051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
7052#[inline]
7053#[target_feature(enable = "avx512f,avx512vl")]
7054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7055#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7056#[rustc_legacy_const_generics(3)]
7057pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7058    unsafe {
7059        static_assert_uimm_bits!(IMM8, 8);
7060        let a = a.as_i32x4();
7061        let b = b.as_i32x4();
7062        let c = c.as_i32x4();
7063        let r = vpternlogd128(a, b, c, IMM8);
7064        transmute(r)
7065    }
7066}
7067
7068/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
7069///
7070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
7071#[inline]
7072#[target_feature(enable = "avx512f,avx512vl")]
7073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7074#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7075#[rustc_legacy_const_generics(4)]
7076pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
7077    src: __m128i,
7078    k: __mmask8,
7079    a: __m128i,
7080    b: __m128i,
7081) -> __m128i {
7082    unsafe {
7083        static_assert_uimm_bits!(IMM8, 8);
7084        let src = src.as_i32x4();
7085        let a = a.as_i32x4();
7086        let b = b.as_i32x4();
7087        let r = vpternlogd128(src, a, b, IMM8);
7088        transmute(simd_select_bitmask(k, r, src))
7089    }
7090}
7091
7092/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
7093///
7094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
7095#[inline]
7096#[target_feature(enable = "avx512f,avx512vl")]
7097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7098#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
7099#[rustc_legacy_const_generics(4)]
7100pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
7101    k: __mmask8,
7102    a: __m128i,
7103    b: __m128i,
7104    c: __m128i,
7105) -> __m128i {
7106    unsafe {
7107        static_assert_uimm_bits!(IMM8, 8);
7108        let a = a.as_i32x4();
7109        let b = b.as_i32x4();
7110        let c = c.as_i32x4();
7111        let r = vpternlogd128(a, b, c, IMM8);
7112        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
7113    }
7114}
7115
7116/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7122#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7123#[rustc_legacy_const_generics(3)]
7124pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
7125    unsafe {
7126        static_assert_uimm_bits!(IMM8, 8);
7127        let a = a.as_i64x8();
7128        let b = b.as_i64x8();
7129        let c = c.as_i64x8();
7130        let r = vpternlogq(a, b, c, IMM8);
7131        transmute(r)
7132    }
7133}
7134
7135/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7136///
7137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
7138#[inline]
7139#[target_feature(enable = "avx512f")]
7140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7141#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7142#[rustc_legacy_const_generics(4)]
7143pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
7144    src: __m512i,
7145    k: __mmask8,
7146    a: __m512i,
7147    b: __m512i,
7148) -> __m512i {
7149    unsafe {
7150        static_assert_uimm_bits!(IMM8, 8);
7151        let src = src.as_i64x8();
7152        let a = a.as_i64x8();
7153        let b = b.as_i64x8();
7154        let r = vpternlogq(src, a, b, IMM8);
7155        transmute(simd_select_bitmask(k, r, src))
7156    }
7157}
7158
7159/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7160///
7161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
7162#[inline]
7163#[target_feature(enable = "avx512f")]
7164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7165#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7166#[rustc_legacy_const_generics(4)]
7167pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
7168    k: __mmask8,
7169    a: __m512i,
7170    b: __m512i,
7171    c: __m512i,
7172) -> __m512i {
7173    unsafe {
7174        static_assert_uimm_bits!(IMM8, 8);
7175        let a = a.as_i64x8();
7176        let b = b.as_i64x8();
7177        let c = c.as_i64x8();
7178        let r = vpternlogq(a, b, c, IMM8);
7179        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
7180    }
7181}
7182
7183/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7184///
7185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
7186#[inline]
7187#[target_feature(enable = "avx512f,avx512vl")]
7188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7189#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7190#[rustc_legacy_const_generics(3)]
7191pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
7192    unsafe {
7193        static_assert_uimm_bits!(IMM8, 8);
7194        let a = a.as_i64x4();
7195        let b = b.as_i64x4();
7196        let c = c.as_i64x4();
7197        let r = vpternlogq256(a, b, c, IMM8);
7198        transmute(r)
7199    }
7200}
7201
7202/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7203///
7204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
7205#[inline]
7206#[target_feature(enable = "avx512f,avx512vl")]
7207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7208#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7209#[rustc_legacy_const_generics(4)]
7210pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
7211    src: __m256i,
7212    k: __mmask8,
7213    a: __m256i,
7214    b: __m256i,
7215) -> __m256i {
7216    unsafe {
7217        static_assert_uimm_bits!(IMM8, 8);
7218        let src = src.as_i64x4();
7219        let a = a.as_i64x4();
7220        let b = b.as_i64x4();
7221        let r = vpternlogq256(src, a, b, IMM8);
7222        transmute(simd_select_bitmask(k, r, src))
7223    }
7224}
7225
7226/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7227///
7228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
7229#[inline]
7230#[target_feature(enable = "avx512f,avx512vl")]
7231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7232#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7233#[rustc_legacy_const_generics(4)]
7234pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
7235    k: __mmask8,
7236    a: __m256i,
7237    b: __m256i,
7238    c: __m256i,
7239) -> __m256i {
7240    unsafe {
7241        static_assert_uimm_bits!(IMM8, 8);
7242        let a = a.as_i64x4();
7243        let b = b.as_i64x4();
7244        let c = c.as_i64x4();
7245        let r = vpternlogq256(a, b, c, IMM8);
7246        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
7247    }
7248}
7249
7250/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
7251///
7252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
7253#[inline]
7254#[target_feature(enable = "avx512f,avx512vl")]
7255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7256#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7257#[rustc_legacy_const_generics(3)]
7258pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
7259    unsafe {
7260        static_assert_uimm_bits!(IMM8, 8);
7261        let a = a.as_i64x2();
7262        let b = b.as_i64x2();
7263        let c = c.as_i64x2();
7264        let r = vpternlogq128(a, b, c, IMM8);
7265        transmute(r)
7266    }
7267}
7268
7269/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
7270///
7271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
7272#[inline]
7273#[target_feature(enable = "avx512f,avx512vl")]
7274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7275#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7276#[rustc_legacy_const_generics(4)]
7277pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
7278    src: __m128i,
7279    k: __mmask8,
7280    a: __m128i,
7281    b: __m128i,
7282) -> __m128i {
7283    unsafe {
7284        static_assert_uimm_bits!(IMM8, 8);
7285        let src = src.as_i64x2();
7286        let a = a.as_i64x2();
7287        let b = b.as_i64x2();
7288        let r = vpternlogq128(src, a, b, IMM8);
7289        transmute(simd_select_bitmask(k, r, src))
7290    }
7291}
7292
7293/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7294///
7295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7296#[inline]
7297#[target_feature(enable = "avx512f,avx512vl")]
7298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7299#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7300#[rustc_legacy_const_generics(4)]
7301pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7302    k: __mmask8,
7303    a: __m128i,
7304    b: __m128i,
7305    c: __m128i,
7306) -> __m128i {
7307    unsafe {
7308        static_assert_uimm_bits!(IMM8, 8);
7309        let a = a.as_i64x2();
7310        let b = b.as_i64x2();
7311        let c = c.as_i64x2();
7312        let r = vpternlogq128(a, b, c, IMM8);
7313        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
7314    }
7315}
7316
7317/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7318/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7319///    _MM_MANT_NORM_1_2     // interval [1, 2)
7320///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7321///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7322///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7323/// The sign is determined by sc which can take the following values:
7324///    _MM_MANT_SIGN_src     // sign = sign(src)
7325///    _MM_MANT_SIGN_zero    // sign = 0
7326///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7327///
7328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7329#[inline]
7330#[target_feature(enable = "avx512f")]
7331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7332#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7333#[rustc_legacy_const_generics(1, 2)]
7334pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7335    a: __m512,
7336) -> __m512 {
7337    unsafe {
7338        static_assert_uimm_bits!(NORM, 4);
7339        static_assert_uimm_bits!(SIGN, 2);
7340        let a = a.as_f32x16();
7341        let zero = f32x16::ZERO;
7342        let r = vgetmantps(
7343            a,
7344            SIGN << 2 | NORM,
7345            zero,
7346            0b11111111_11111111,
7347            _MM_FROUND_CUR_DIRECTION,
7348        );
7349        transmute(r)
7350    }
7351}
7352
7353/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7354/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7355///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7356///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7357///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7358///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7359/// The sign is determined by sc which can take the following values:\
7360///    _MM_MANT_SIGN_src     // sign = sign(src)\
7361///    _MM_MANT_SIGN_zero    // sign = 0\
7362///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7363///
7364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7365#[inline]
7366#[target_feature(enable = "avx512f")]
7367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7368#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7369#[rustc_legacy_const_generics(3, 4)]
7370pub fn _mm512_mask_getmant_ps<
7371    const NORM: _MM_MANTISSA_NORM_ENUM,
7372    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7373>(
7374    src: __m512,
7375    k: __mmask16,
7376    a: __m512,
7377) -> __m512 {
7378    unsafe {
7379        static_assert_uimm_bits!(NORM, 4);
7380        static_assert_uimm_bits!(SIGN, 2);
7381        let a = a.as_f32x16();
7382        let src = src.as_f32x16();
7383        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7384        transmute(r)
7385    }
7386}
7387
7388/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7389/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7390///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7391///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7392///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7393///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7394/// The sign is determined by sc which can take the following values:\
7395///    _MM_MANT_SIGN_src     // sign = sign(src)\
7396///    _MM_MANT_SIGN_zero    // sign = 0\
7397///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7398///
7399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7400#[inline]
7401#[target_feature(enable = "avx512f")]
7402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7403#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7404#[rustc_legacy_const_generics(2, 3)]
7405pub fn _mm512_maskz_getmant_ps<
7406    const NORM: _MM_MANTISSA_NORM_ENUM,
7407    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7408>(
7409    k: __mmask16,
7410    a: __m512,
7411) -> __m512 {
7412    unsafe {
7413        static_assert_uimm_bits!(NORM, 4);
7414        static_assert_uimm_bits!(SIGN, 2);
7415        let a = a.as_f32x16();
7416        let r = vgetmantps(
7417            a,
7418            SIGN << 2 | NORM,
7419            f32x16::ZERO,
7420            k,
7421            _MM_FROUND_CUR_DIRECTION,
7422        );
7423        transmute(r)
7424    }
7425}
7426
7427/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7428/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7429///    _MM_MANT_NORM_1_2     // interval [1, 2)
7430///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7431///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7432///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7433/// The sign is determined by sc which can take the following values:
7434///    _MM_MANT_SIGN_src     // sign = sign(src)
7435///    _MM_MANT_SIGN_zero    // sign = 0
7436///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7437///
7438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7439#[inline]
7440#[target_feature(enable = "avx512f,avx512vl")]
7441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7442#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7443#[rustc_legacy_const_generics(1, 2)]
7444pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7445    a: __m256,
7446) -> __m256 {
7447    unsafe {
7448        static_assert_uimm_bits!(NORM, 4);
7449        static_assert_uimm_bits!(SIGN, 2);
7450        let a = a.as_f32x8();
7451        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, 0b11111111);
7452        transmute(r)
7453    }
7454}
7455
7456/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7457/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7458///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7459///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7460///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7461///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7462/// The sign is determined by sc which can take the following values:\
7463///    _MM_MANT_SIGN_src     // sign = sign(src)\
7464///    _MM_MANT_SIGN_zero    // sign = 0\
7465///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7466///
7467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7468#[inline]
7469#[target_feature(enable = "avx512f,avx512vl")]
7470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7471#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7472#[rustc_legacy_const_generics(3, 4)]
7473pub fn _mm256_mask_getmant_ps<
7474    const NORM: _MM_MANTISSA_NORM_ENUM,
7475    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7476>(
7477    src: __m256,
7478    k: __mmask8,
7479    a: __m256,
7480) -> __m256 {
7481    unsafe {
7482        static_assert_uimm_bits!(NORM, 4);
7483        static_assert_uimm_bits!(SIGN, 2);
7484        let a = a.as_f32x8();
7485        let src = src.as_f32x8();
7486        let r = vgetmantps256(a, SIGN << 2 | NORM, src, k);
7487        transmute(r)
7488    }
7489}
7490
7491/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7492/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7493///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7494///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7495///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7496///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7497/// The sign is determined by sc which can take the following values:\
7498///    _MM_MANT_SIGN_src     // sign = sign(src)\
7499///    _MM_MANT_SIGN_zero    // sign = 0\
7500///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7501///
7502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7503#[inline]
7504#[target_feature(enable = "avx512f,avx512vl")]
7505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7506#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7507#[rustc_legacy_const_generics(2, 3)]
7508pub fn _mm256_maskz_getmant_ps<
7509    const NORM: _MM_MANTISSA_NORM_ENUM,
7510    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7511>(
7512    k: __mmask8,
7513    a: __m256,
7514) -> __m256 {
7515    unsafe {
7516        static_assert_uimm_bits!(NORM, 4);
7517        static_assert_uimm_bits!(SIGN, 2);
7518        let a = a.as_f32x8();
7519        let r = vgetmantps256(a, SIGN << 2 | NORM, f32x8::ZERO, k);
7520        transmute(r)
7521    }
7522}
7523
7524/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7525/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7526///    _MM_MANT_NORM_1_2     // interval [1, 2)
7527///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)
7528///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)
7529///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7530/// The sign is determined by sc which can take the following values:
7531///    _MM_MANT_SIGN_src     // sign = sign(src)
7532///    _MM_MANT_SIGN_zero    // sign = 0
7533///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7534///
7535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7536#[inline]
7537#[target_feature(enable = "avx512f,avx512vl")]
7538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7539#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7540#[rustc_legacy_const_generics(1, 2)]
7541pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7542    a: __m128,
7543) -> __m128 {
7544    unsafe {
7545        static_assert_uimm_bits!(NORM, 4);
7546        static_assert_uimm_bits!(SIGN, 2);
7547        let a = a.as_f32x4();
7548        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, 0b00001111);
7549        transmute(r)
7550    }
7551}
7552
7553/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7554/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7555///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7556///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7557///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7558///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7559/// The sign is determined by sc which can take the following values:\
7560///    _MM_MANT_SIGN_src     // sign = sign(src)\
7561///    _MM_MANT_SIGN_zero    // sign = 0\
7562///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7563///
7564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7565#[inline]
7566#[target_feature(enable = "avx512f,avx512vl")]
7567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7568#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7569#[rustc_legacy_const_generics(3, 4)]
7570pub fn _mm_mask_getmant_ps<
7571    const NORM: _MM_MANTISSA_NORM_ENUM,
7572    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7573>(
7574    src: __m128,
7575    k: __mmask8,
7576    a: __m128,
7577) -> __m128 {
7578    unsafe {
7579        static_assert_uimm_bits!(NORM, 4);
7580        static_assert_uimm_bits!(SIGN, 2);
7581        let a = a.as_f32x4();
7582        let src = src.as_f32x4();
7583        let r = vgetmantps128(a, SIGN << 2 | NORM, src, k);
7584        transmute(r)
7585    }
7586}
7587
7588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7590///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7591///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7592///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7593///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7594/// The sign is determined by sc which can take the following values:\
7595///    _MM_MANT_SIGN_src     // sign = sign(src)\
7596///    _MM_MANT_SIGN_zero    // sign = 0\
7597///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7598///
7599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7600#[inline]
7601#[target_feature(enable = "avx512f,avx512vl")]
7602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7603#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7604#[rustc_legacy_const_generics(2, 3)]
7605pub fn _mm_maskz_getmant_ps<
7606    const NORM: _MM_MANTISSA_NORM_ENUM,
7607    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7608>(
7609    k: __mmask8,
7610    a: __m128,
7611) -> __m128 {
7612    unsafe {
7613        static_assert_uimm_bits!(NORM, 4);
7614        static_assert_uimm_bits!(SIGN, 2);
7615        let a = a.as_f32x4();
7616        let r = vgetmantps128(a, SIGN << 2 | NORM, f32x4::ZERO, k);
7617        transmute(r)
7618    }
7619}
7620
7621/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7622/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7623///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7624///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7625///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7626///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7627/// The sign is determined by sc which can take the following values:\
7628///    _MM_MANT_SIGN_src     // sign = sign(src)\
7629///    _MM_MANT_SIGN_zero    // sign = 0\
7630///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7631///
7632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7633#[inline]
7634#[target_feature(enable = "avx512f")]
7635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7636#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7637#[rustc_legacy_const_generics(1, 2)]
7638pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7639    a: __m512d,
7640) -> __m512d {
7641    unsafe {
7642        static_assert_uimm_bits!(NORM, 4);
7643        static_assert_uimm_bits!(SIGN, 2);
7644        let a = a.as_f64x8();
7645        let zero = f64x8::ZERO;
7646        let r = vgetmantpd(
7647            a,
7648            SIGN << 2 | NORM,
7649            zero,
7650            0b11111111,
7651            _MM_FROUND_CUR_DIRECTION,
7652        );
7653        transmute(r)
7654    }
7655}
7656
7657/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7658/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7659///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7660///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7661///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7662///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7663/// The sign is determined by sc which can take the following values:\
7664///    _MM_MANT_SIGN_src     // sign = sign(src)\
7665///    _MM_MANT_SIGN_zero    // sign = 0\
7666///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7667///
7668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7669#[inline]
7670#[target_feature(enable = "avx512f")]
7671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7672#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7673#[rustc_legacy_const_generics(3, 4)]
7674pub fn _mm512_mask_getmant_pd<
7675    const NORM: _MM_MANTISSA_NORM_ENUM,
7676    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7677>(
7678    src: __m512d,
7679    k: __mmask8,
7680    a: __m512d,
7681) -> __m512d {
7682    unsafe {
7683        static_assert_uimm_bits!(NORM, 4);
7684        static_assert_uimm_bits!(SIGN, 2);
7685        let a = a.as_f64x8();
7686        let src = src.as_f64x8();
7687        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
7688        transmute(r)
7689    }
7690}
7691
7692/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7693/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7694///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7695///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7696///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7697///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7698/// The sign is determined by sc which can take the following values:\
7699///    _MM_MANT_SIGN_src     // sign = sign(src)\
7700///    _MM_MANT_SIGN_zero    // sign = 0\
7701///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7702///
7703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7704#[inline]
7705#[target_feature(enable = "avx512f")]
7706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7707#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7708#[rustc_legacy_const_generics(2, 3)]
7709pub fn _mm512_maskz_getmant_pd<
7710    const NORM: _MM_MANTISSA_NORM_ENUM,
7711    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7712>(
7713    k: __mmask8,
7714    a: __m512d,
7715) -> __m512d {
7716    unsafe {
7717        static_assert_uimm_bits!(NORM, 4);
7718        static_assert_uimm_bits!(SIGN, 2);
7719        let a = a.as_f64x8();
7720        let r = vgetmantpd(
7721            a,
7722            SIGN << 2 | NORM,
7723            f64x8::ZERO,
7724            k,
7725            _MM_FROUND_CUR_DIRECTION,
7726        );
7727        transmute(r)
7728    }
7729}
7730
7731/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7732/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7733///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7734///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7735///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7736///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7737/// The sign is determined by sc which can take the following values:\
7738///    _MM_MANT_SIGN_src     // sign = sign(src)\
7739///    _MM_MANT_SIGN_zero    // sign = 0\
7740///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7741///
7742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7743#[inline]
7744#[target_feature(enable = "avx512f,avx512vl")]
7745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7746#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7747#[rustc_legacy_const_generics(1, 2)]
7748pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7749    a: __m256d,
7750) -> __m256d {
7751    unsafe {
7752        static_assert_uimm_bits!(NORM, 4);
7753        static_assert_uimm_bits!(SIGN, 2);
7754        let a = a.as_f64x4();
7755        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, 0b00001111);
7756        transmute(r)
7757    }
7758}
7759
7760/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7761/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7762///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7763///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7764///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7765///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7766/// The sign is determined by sc which can take the following values:\
7767///    _MM_MANT_SIGN_src     // sign = sign(src)\
7768///    _MM_MANT_SIGN_zero    // sign = 0\
7769///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7770///
7771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7772#[inline]
7773#[target_feature(enable = "avx512f,avx512vl")]
7774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7775#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7776#[rustc_legacy_const_generics(3, 4)]
7777pub fn _mm256_mask_getmant_pd<
7778    const NORM: _MM_MANTISSA_NORM_ENUM,
7779    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7780>(
7781    src: __m256d,
7782    k: __mmask8,
7783    a: __m256d,
7784) -> __m256d {
7785    unsafe {
7786        static_assert_uimm_bits!(NORM, 4);
7787        static_assert_uimm_bits!(SIGN, 2);
7788        let a = a.as_f64x4();
7789        let src = src.as_f64x4();
7790        let r = vgetmantpd256(a, SIGN << 2 | NORM, src, k);
7791        transmute(r)
7792    }
7793}
7794
7795/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7796/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7797///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7798///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7799///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7800///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7801/// The sign is determined by sc which can take the following values:\
7802///    _MM_MANT_SIGN_src     // sign = sign(src)\
7803///    _MM_MANT_SIGN_zero    // sign = 0\
7804///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7805///
7806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7807#[inline]
7808#[target_feature(enable = "avx512f,avx512vl")]
7809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7810#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7811#[rustc_legacy_const_generics(2, 3)]
7812pub fn _mm256_maskz_getmant_pd<
7813    const NORM: _MM_MANTISSA_NORM_ENUM,
7814    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7815>(
7816    k: __mmask8,
7817    a: __m256d,
7818) -> __m256d {
7819    unsafe {
7820        static_assert_uimm_bits!(NORM, 4);
7821        static_assert_uimm_bits!(SIGN, 2);
7822        let a = a.as_f64x4();
7823        let r = vgetmantpd256(a, SIGN << 2 | NORM, f64x4::ZERO, k);
7824        transmute(r)
7825    }
7826}
7827
7828/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7829/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7830///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7831///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7832///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7833///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7834/// The sign is determined by sc which can take the following values:\
7835///    _MM_MANT_SIGN_src     // sign = sign(src)\
7836///    _MM_MANT_SIGN_zero    // sign = 0\
7837///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7838///
7839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7840#[inline]
7841#[target_feature(enable = "avx512f,avx512vl")]
7842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7843#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7844#[rustc_legacy_const_generics(1, 2)]
7845pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7846    a: __m128d,
7847) -> __m128d {
7848    unsafe {
7849        static_assert_uimm_bits!(NORM, 4);
7850        static_assert_uimm_bits!(SIGN, 2);
7851        let a = a.as_f64x2();
7852        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, 0b00000011);
7853        transmute(r)
7854    }
7855}
7856
7857/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7858/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7859///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7860///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7861///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7862///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7863/// The sign is determined by sc which can take the following values:\
7864///    _MM_MANT_SIGN_src     // sign = sign(src)\
7865///    _MM_MANT_SIGN_zero    // sign = 0\
7866///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7867///
7868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7869#[inline]
7870#[target_feature(enable = "avx512f,avx512vl")]
7871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7872#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7873#[rustc_legacy_const_generics(3, 4)]
7874pub fn _mm_mask_getmant_pd<
7875    const NORM: _MM_MANTISSA_NORM_ENUM,
7876    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7877>(
7878    src: __m128d,
7879    k: __mmask8,
7880    a: __m128d,
7881) -> __m128d {
7882    unsafe {
7883        static_assert_uimm_bits!(NORM, 4);
7884        static_assert_uimm_bits!(SIGN, 2);
7885        let a = a.as_f64x2();
7886        let src = src.as_f64x2();
7887        let r = vgetmantpd128(a, SIGN << 2 | NORM, src, k);
7888        transmute(r)
7889    }
7890}
7891
7892/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7893/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7894///    _MM_MANT_NORM_1_2     // interval [1, 2)\
7895///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
7896///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
7897///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7898/// The sign is determined by sc which can take the following values:\
7899///    _MM_MANT_SIGN_src     // sign = sign(src)\
7900///    _MM_MANT_SIGN_zero    // sign = 0\
7901///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1
7902///
7903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7904#[inline]
7905#[target_feature(enable = "avx512f,avx512vl")]
7906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7907#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7908#[rustc_legacy_const_generics(2, 3)]
7909pub fn _mm_maskz_getmant_pd<
7910    const NORM: _MM_MANTISSA_NORM_ENUM,
7911    const SIGN: _MM_MANTISSA_SIGN_ENUM,
7912>(
7913    k: __mmask8,
7914    a: __m128d,
7915) -> __m128d {
7916    unsafe {
7917        static_assert_uimm_bits!(NORM, 4);
7918        static_assert_uimm_bits!(SIGN, 2);
7919        let a = a.as_f64x2();
7920        let r = vgetmantpd128(a, SIGN << 2 | NORM, f64x2::ZERO, k);
7921        transmute(r)
7922    }
7923}
7924
7925/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7926///
7927/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7928/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7929/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7930/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7931/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7932/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7933///
7934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7935#[inline]
7936#[target_feature(enable = "avx512f")]
7937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7938#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7939#[rustc_legacy_const_generics(2)]
7940pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7941    unsafe {
7942        static_assert_rounding!(ROUNDING);
7943        let a = a.as_f32x16();
7944        let b = b.as_f32x16();
7945        let r = vaddps(a, b, ROUNDING);
7946        transmute(r)
7947    }
7948}
7949
7950/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7951///
7952/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7953/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7954/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7955/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7956/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7958///
7959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7960#[inline]
7961#[target_feature(enable = "avx512f")]
7962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7963#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7964#[rustc_legacy_const_generics(4)]
7965pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7966    src: __m512,
7967    k: __mmask16,
7968    a: __m512,
7969    b: __m512,
7970) -> __m512 {
7971    unsafe {
7972        static_assert_rounding!(ROUNDING);
7973        let a = a.as_f32x16();
7974        let b = b.as_f32x16();
7975        let r = vaddps(a, b, ROUNDING);
7976        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
7977    }
7978}
7979
7980/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(3)]
7995pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7996    k: __mmask16,
7997    a: __m512,
7998    b: __m512,
7999) -> __m512 {
8000    unsafe {
8001        static_assert_rounding!(ROUNDING);
8002        let a = a.as_f32x16();
8003        let b = b.as_f32x16();
8004        let r = vaddps(a, b, ROUNDING);
8005        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8006    }
8007}
8008
8009/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8010///
8011/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8012/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8013/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8014/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8015/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8016/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8017///
8018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
8019#[inline]
8020#[target_feature(enable = "avx512f")]
8021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8022#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8023#[rustc_legacy_const_generics(2)]
8024pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8025    unsafe {
8026        static_assert_rounding!(ROUNDING);
8027        let a = a.as_f64x8();
8028        let b = b.as_f64x8();
8029        let r = vaddpd(a, b, ROUNDING);
8030        transmute(r)
8031    }
8032}
8033
8034/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8035///
8036/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8037/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8038/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8039/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8040/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8041/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8042///
8043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
8044#[inline]
8045#[target_feature(enable = "avx512f")]
8046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8047#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8048#[rustc_legacy_const_generics(4)]
8049pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
8050    src: __m512d,
8051    k: __mmask8,
8052    a: __m512d,
8053    b: __m512d,
8054) -> __m512d {
8055    unsafe {
8056        static_assert_rounding!(ROUNDING);
8057        let a = a.as_f64x8();
8058        let b = b.as_f64x8();
8059        let r = vaddpd(a, b, ROUNDING);
8060        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8061    }
8062}
8063
8064/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8077#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(3)]
8079pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
8080    k: __mmask8,
8081    a: __m512d,
8082    b: __m512d,
8083) -> __m512d {
8084    unsafe {
8085        static_assert_rounding!(ROUNDING);
8086        let a = a.as_f64x8();
8087        let b = b.as_f64x8();
8088        let r = vaddpd(a, b, ROUNDING);
8089        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8090    }
8091}
8092
8093/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8094///
8095/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8096/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8097/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8098/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8099/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8100/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8101///
8102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
8103#[inline]
8104#[target_feature(enable = "avx512f")]
8105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8106#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8107#[rustc_legacy_const_generics(2)]
8108pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8109    unsafe {
8110        static_assert_rounding!(ROUNDING);
8111        let a = a.as_f32x16();
8112        let b = b.as_f32x16();
8113        let r = vsubps(a, b, ROUNDING);
8114        transmute(r)
8115    }
8116}
8117
8118/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8119///
8120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8126///
8127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
8128#[inline]
8129#[target_feature(enable = "avx512f")]
8130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8131#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8132#[rustc_legacy_const_generics(4)]
8133pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
8134    src: __m512,
8135    k: __mmask16,
8136    a: __m512,
8137    b: __m512,
8138) -> __m512 {
8139    unsafe {
8140        static_assert_rounding!(ROUNDING);
8141        let a = a.as_f32x16();
8142        let b = b.as_f32x16();
8143        let r = vsubps(a, b, ROUNDING);
8144        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8145    }
8146}
8147
8148/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8161#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(3)]
8163pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
8164    k: __mmask16,
8165    a: __m512,
8166    b: __m512,
8167) -> __m512 {
8168    unsafe {
8169        static_assert_rounding!(ROUNDING);
8170        let a = a.as_f32x16();
8171        let b = b.as_f32x16();
8172        let r = vsubps(a, b, ROUNDING);
8173        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8174    }
8175}
8176
8177/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8178///
8179/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8180/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8181/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8182/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8183/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8184/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8185///
8186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
8187#[inline]
8188#[target_feature(enable = "avx512f")]
8189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8190#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8191#[rustc_legacy_const_generics(2)]
8192pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8193    unsafe {
8194        static_assert_rounding!(ROUNDING);
8195        let a = a.as_f64x8();
8196        let b = b.as_f64x8();
8197        let r = vsubpd(a, b, ROUNDING);
8198        transmute(r)
8199    }
8200}
8201
8202/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8203///
8204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8210///
8211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
8212#[inline]
8213#[target_feature(enable = "avx512f")]
8214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8215#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8216#[rustc_legacy_const_generics(4)]
8217pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
8218    src: __m512d,
8219    k: __mmask8,
8220    a: __m512d,
8221    b: __m512d,
8222) -> __m512d {
8223    unsafe {
8224        static_assert_rounding!(ROUNDING);
8225        let a = a.as_f64x8();
8226        let b = b.as_f64x8();
8227        let r = vsubpd(a, b, ROUNDING);
8228        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8229    }
8230}
8231
8232/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8245#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(3)]
8247pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
8248    k: __mmask8,
8249    a: __m512d,
8250    b: __m512d,
8251) -> __m512d {
8252    unsafe {
8253        static_assert_rounding!(ROUNDING);
8254        let a = a.as_f64x8();
8255        let b = b.as_f64x8();
8256        let r = vsubpd(a, b, ROUNDING);
8257        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8258    }
8259}
8260
8261/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
8262///
8263/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8264/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8265/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8266/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8267/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8268/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8269///
8270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
8271#[inline]
8272#[target_feature(enable = "avx512f")]
8273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8274#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8275#[rustc_legacy_const_generics(2)]
8276pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8277    unsafe {
8278        static_assert_rounding!(ROUNDING);
8279        let a = a.as_f32x16();
8280        let b = b.as_f32x16();
8281        let r = vmulps(a, b, ROUNDING);
8282        transmute(r)
8283    }
8284}
8285
8286/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8287///
8288/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8289/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8290/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8291/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8292/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8293/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8294///
8295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8296#[inline]
8297#[target_feature(enable = "avx512f")]
8298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8299#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8300#[rustc_legacy_const_generics(4)]
8301pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8302    src: __m512,
8303    k: __mmask16,
8304    a: __m512,
8305    b: __m512,
8306) -> __m512 {
8307    unsafe {
8308        static_assert_rounding!(ROUNDING);
8309        let a = a.as_f32x16();
8310        let b = b.as_f32x16();
8311        let r = vmulps(a, b, ROUNDING);
8312        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8313    }
8314}
8315
8316/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8329#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(3)]
8331pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8332    k: __mmask16,
8333    a: __m512,
8334    b: __m512,
8335) -> __m512 {
8336    unsafe {
8337        static_assert_rounding!(ROUNDING);
8338        let a = a.as_f32x16();
8339        let b = b.as_f32x16();
8340        let r = vmulps(a, b, ROUNDING);
8341        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8342    }
8343}
8344
8345/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8346///
8347/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8348/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8349/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8350/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8351/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8352/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8353///
8354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8355#[inline]
8356#[target_feature(enable = "avx512f")]
8357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8358#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8359#[rustc_legacy_const_generics(2)]
8360pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8361    unsafe {
8362        static_assert_rounding!(ROUNDING);
8363        let a = a.as_f64x8();
8364        let b = b.as_f64x8();
8365        let r = vmulpd(a, b, ROUNDING);
8366        transmute(r)
8367    }
8368}
8369
8370/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8371///
8372/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8373/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8374/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8375/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8376/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8377/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8378///
8379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8380#[inline]
8381#[target_feature(enable = "avx512f")]
8382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8383#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8384#[rustc_legacy_const_generics(4)]
8385pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8386    src: __m512d,
8387    k: __mmask8,
8388    a: __m512d,
8389    b: __m512d,
8390) -> __m512d {
8391    unsafe {
8392        static_assert_rounding!(ROUNDING);
8393        let a = a.as_f64x8();
8394        let b = b.as_f64x8();
8395        let r = vmulpd(a, b, ROUNDING);
8396        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8397    }
8398}
8399
8400/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8401///
8402/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8403/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8404/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8405/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8406/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8407/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8408///
8409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8410#[inline]
8411#[target_feature(enable = "avx512f")]
8412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8413#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8414#[rustc_legacy_const_generics(3)]
8415pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8416    k: __mmask8,
8417    a: __m512d,
8418    b: __m512d,
8419) -> __m512d {
8420    unsafe {
8421        static_assert_rounding!(ROUNDING);
8422        let a = a.as_f64x8();
8423        let b = b.as_f64x8();
8424        let r = vmulpd(a, b, ROUNDING);
8425        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8426    }
8427}
8428
8429/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8430///
8431/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8432/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8433/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8434/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8435/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8436/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8437///
8438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8439#[inline]
8440#[target_feature(enable = "avx512f")]
8441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8442#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8443#[rustc_legacy_const_generics(2)]
8444pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8445    unsafe {
8446        static_assert_rounding!(ROUNDING);
8447        let a = a.as_f32x16();
8448        let b = b.as_f32x16();
8449        let r = vdivps(a, b, ROUNDING);
8450        transmute(r)
8451    }
8452}
8453
8454/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8455///
8456/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8457/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8458/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8459/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8460/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8461/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8462///
8463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8464#[inline]
8465#[target_feature(enable = "avx512f")]
8466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8467#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8468#[rustc_legacy_const_generics(4)]
8469pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8470    src: __m512,
8471    k: __mmask16,
8472    a: __m512,
8473    b: __m512,
8474) -> __m512 {
8475    unsafe {
8476        static_assert_rounding!(ROUNDING);
8477        let a = a.as_f32x16();
8478        let b = b.as_f32x16();
8479        let r = vdivps(a, b, ROUNDING);
8480        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8481    }
8482}
8483
8484/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8485///
8486/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8487/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8488/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8489/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8490/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8491/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8492///
8493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8494#[inline]
8495#[target_feature(enable = "avx512f")]
8496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8497#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8498#[rustc_legacy_const_generics(3)]
8499pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8500    k: __mmask16,
8501    a: __m512,
8502    b: __m512,
8503) -> __m512 {
8504    unsafe {
8505        static_assert_rounding!(ROUNDING);
8506        let a = a.as_f32x16();
8507        let b = b.as_f32x16();
8508        let r = vdivps(a, b, ROUNDING);
8509        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8510    }
8511}
8512
8513/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8514///
8515/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8516/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8517/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8518/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8519/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8520/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8521///
8522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8523#[inline]
8524#[target_feature(enable = "avx512f")]
8525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8526#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8527#[rustc_legacy_const_generics(2)]
8528pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8529    unsafe {
8530        static_assert_rounding!(ROUNDING);
8531        let a = a.as_f64x8();
8532        let b = b.as_f64x8();
8533        let r = vdivpd(a, b, ROUNDING);
8534        transmute(r)
8535    }
8536}
8537
8538/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8539///
8540/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8541/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8542/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8543/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8544/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8545/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8546///
8547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8548#[inline]
8549#[target_feature(enable = "avx512f")]
8550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8551#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8552#[rustc_legacy_const_generics(4)]
8553pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8554    src: __m512d,
8555    k: __mmask8,
8556    a: __m512d,
8557    b: __m512d,
8558) -> __m512d {
8559    unsafe {
8560        static_assert_rounding!(ROUNDING);
8561        let a = a.as_f64x8();
8562        let b = b.as_f64x8();
8563        let r = vdivpd(a, b, ROUNDING);
8564        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8565    }
8566}
8567
8568/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8569///
8570/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8571/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8572/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8573/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8574/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8575/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8576///
8577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8578#[inline]
8579#[target_feature(enable = "avx512f")]
8580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8581#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8582#[rustc_legacy_const_generics(3)]
8583pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8584    k: __mmask8,
8585    a: __m512d,
8586    b: __m512d,
8587) -> __m512d {
8588    unsafe {
8589        static_assert_rounding!(ROUNDING);
8590        let a = a.as_f64x8();
8591        let b = b.as_f64x8();
8592        let r = vdivpd(a, b, ROUNDING);
8593        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8594    }
8595}
8596
8597/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8598///
8599/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8600/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8601/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8602/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8603/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8604/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8605///
8606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8607#[inline]
8608#[target_feature(enable = "avx512f")]
8609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8610#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8611#[rustc_legacy_const_generics(1)]
8612pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8613    unsafe {
8614        static_assert_rounding!(ROUNDING);
8615        let a = a.as_f32x16();
8616        let r = vsqrtps(a, ROUNDING);
8617        transmute(r)
8618    }
8619}
8620
8621/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8622///
8623/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8624/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8625/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8626/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8627/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8628/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8629///
8630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8631#[inline]
8632#[target_feature(enable = "avx512f")]
8633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8634#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8635#[rustc_legacy_const_generics(3)]
8636pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8637    src: __m512,
8638    k: __mmask16,
8639    a: __m512,
8640) -> __m512 {
8641    unsafe {
8642        static_assert_rounding!(ROUNDING);
8643        let a = a.as_f32x16();
8644        let r = vsqrtps(a, ROUNDING);
8645        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
8646    }
8647}
8648
8649/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8650///
8651/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8652/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8653/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8654/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8655/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8656/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8657///
8658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8659#[inline]
8660#[target_feature(enable = "avx512f")]
8661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8662#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8663#[rustc_legacy_const_generics(2)]
8664pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8665    unsafe {
8666        static_assert_rounding!(ROUNDING);
8667        let a = a.as_f32x16();
8668        let r = vsqrtps(a, ROUNDING);
8669        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
8670    }
8671}
8672
8673/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8674///
8675/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8676/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8677/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8678/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8679/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8680/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8681///
8682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8683#[inline]
8684#[target_feature(enable = "avx512f")]
8685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8686#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8687#[rustc_legacy_const_generics(1)]
8688pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8689    unsafe {
8690        static_assert_rounding!(ROUNDING);
8691        let a = a.as_f64x8();
8692        let r = vsqrtpd(a, ROUNDING);
8693        transmute(r)
8694    }
8695}
8696
8697/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8698///
8699/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8700/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8701/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8702/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8703/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8704/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8705///
8706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8707#[inline]
8708#[target_feature(enable = "avx512f")]
8709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8710#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8711#[rustc_legacy_const_generics(3)]
8712pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8713    src: __m512d,
8714    k: __mmask8,
8715    a: __m512d,
8716) -> __m512d {
8717    unsafe {
8718        static_assert_rounding!(ROUNDING);
8719        let a = a.as_f64x8();
8720        let r = vsqrtpd(a, ROUNDING);
8721        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
8722    }
8723}
8724
8725/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8726///
8727/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8728/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8729/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8730/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8731/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8732/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8733///
8734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8735#[inline]
8736#[target_feature(enable = "avx512f")]
8737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8738#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8739#[rustc_legacy_const_generics(2)]
8740pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8741    unsafe {
8742        static_assert_rounding!(ROUNDING);
8743        let a = a.as_f64x8();
8744        let r = vsqrtpd(a, ROUNDING);
8745        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
8746    }
8747}
8748
8749/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8750///
8751/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8752/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8753/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8754/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8755/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8757///
8758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8759#[inline]
8760#[target_feature(enable = "avx512f")]
8761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8762#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8763#[rustc_legacy_const_generics(3)]
8764pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8765    unsafe {
8766        static_assert_rounding!(ROUNDING);
8767        vfmadd132psround(a, b, c, ROUNDING)
8768    }
8769}
8770
8771/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8772///
8773/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8774/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8775/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8776/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8777/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8778/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8779///
8780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8781#[inline]
8782#[target_feature(enable = "avx512f")]
8783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8784#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8785#[rustc_legacy_const_generics(4)]
8786pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8787    a: __m512,
8788    k: __mmask16,
8789    b: __m512,
8790    c: __m512,
8791) -> __m512 {
8792    unsafe {
8793        static_assert_rounding!(ROUNDING);
8794        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), a)
8795    }
8796}
8797
8798/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8799///
8800/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8801/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8802/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8803/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8804/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8805/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8806///
8807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8808#[inline]
8809#[target_feature(enable = "avx512f")]
8810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8811#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8812#[rustc_legacy_const_generics(4)]
8813pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8814    k: __mmask16,
8815    a: __m512,
8816    b: __m512,
8817    c: __m512,
8818) -> __m512 {
8819    unsafe {
8820        static_assert_rounding!(ROUNDING);
8821        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), _mm512_setzero_ps())
8822    }
8823}
8824
8825/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8826///
8827/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8828/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8829/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8830/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8831/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8833///
8834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8835#[inline]
8836#[target_feature(enable = "avx512f")]
8837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8838#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8839#[rustc_legacy_const_generics(4)]
8840pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8841    a: __m512,
8842    b: __m512,
8843    c: __m512,
8844    k: __mmask16,
8845) -> __m512 {
8846    unsafe {
8847        static_assert_rounding!(ROUNDING);
8848        simd_select_bitmask(k, vfmadd132psround(a, b, c, ROUNDING), c)
8849    }
8850}
8851
8852/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8853///
8854/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8855/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8856/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8857/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8858/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8859/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8860///
8861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8862#[inline]
8863#[target_feature(enable = "avx512f")]
8864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8865#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8866#[rustc_legacy_const_generics(3)]
8867pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8868    unsafe {
8869        static_assert_rounding!(ROUNDING);
8870        vfmadd132pdround(a, b, c, ROUNDING)
8871    }
8872}
8873
8874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8875///
8876/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8877/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8878/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8879/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8880/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8881/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8882///
8883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8884#[inline]
8885#[target_feature(enable = "avx512f")]
8886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8887#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8888#[rustc_legacy_const_generics(4)]
8889pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8890    a: __m512d,
8891    k: __mmask8,
8892    b: __m512d,
8893    c: __m512d,
8894) -> __m512d {
8895    unsafe {
8896        static_assert_rounding!(ROUNDING);
8897        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), a)
8898    }
8899}
8900
8901/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8902///
8903/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8904/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8905/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8906/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8907/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8908/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8909///
8910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8911#[inline]
8912#[target_feature(enable = "avx512f")]
8913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8914#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8915#[rustc_legacy_const_generics(4)]
8916pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8917    k: __mmask8,
8918    a: __m512d,
8919    b: __m512d,
8920    c: __m512d,
8921) -> __m512d {
8922    unsafe {
8923        static_assert_rounding!(ROUNDING);
8924        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), _mm512_setzero_pd())
8925    }
8926}
8927
8928/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8929///
8930/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8931/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8932/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8933/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8934/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8935/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8936///
8937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8938#[inline]
8939#[target_feature(enable = "avx512f")]
8940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8941#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8942#[rustc_legacy_const_generics(4)]
8943pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8944    a: __m512d,
8945    b: __m512d,
8946    c: __m512d,
8947    k: __mmask8,
8948) -> __m512d {
8949    unsafe {
8950        static_assert_rounding!(ROUNDING);
8951        simd_select_bitmask(k, vfmadd132pdround(a, b, c, ROUNDING), c)
8952    }
8953}
8954
8955/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8956///
8957/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8958/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8959/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8960/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8961/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8962/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8963///
8964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8965#[inline]
8966#[target_feature(enable = "avx512f")]
8967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8968#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8969#[rustc_legacy_const_generics(3)]
8970pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8971    unsafe {
8972        static_assert_rounding!(ROUNDING);
8973        vfmadd132psround(a, b, simd_neg(c), ROUNDING)
8974    }
8975}
8976
8977/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8978///
8979/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8980/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8981/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8982/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8983/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8984/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8985///
8986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8987#[inline]
8988#[target_feature(enable = "avx512f")]
8989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8990#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8991#[rustc_legacy_const_generics(4)]
8992pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8993    a: __m512,
8994    k: __mmask16,
8995    b: __m512,
8996    c: __m512,
8997) -> __m512 {
8998    unsafe {
8999        static_assert_rounding!(ROUNDING);
9000        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
9001        simd_select_bitmask(k, r, a)
9002    }
9003}
9004
9005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9006///
9007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9013///
9014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
9015#[inline]
9016#[target_feature(enable = "avx512f")]
9017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9018#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9019#[rustc_legacy_const_generics(4)]
9020pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
9021    k: __mmask16,
9022    a: __m512,
9023    b: __m512,
9024    c: __m512,
9025) -> __m512 {
9026    unsafe {
9027        static_assert_rounding!(ROUNDING);
9028        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
9029        simd_select_bitmask(k, r, _mm512_setzero_ps())
9030    }
9031}
9032
9033/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9034///
9035/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9036/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9037/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9038/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9039/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9040/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9041///
9042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
9043#[inline]
9044#[target_feature(enable = "avx512f")]
9045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9046#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
9047#[rustc_legacy_const_generics(4)]
9048pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
9049    a: __m512,
9050    b: __m512,
9051    c: __m512,
9052    k: __mmask16,
9053) -> __m512 {
9054    unsafe {
9055        static_assert_rounding!(ROUNDING);
9056        let r = vfmadd132psround(a, b, simd_neg(c), ROUNDING);
9057        simd_select_bitmask(k, r, c)
9058    }
9059}
9060
9061/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
9062///
9063/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9064/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9065/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9066/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9067/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9068/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9069///
9070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
9071#[inline]
9072#[target_feature(enable = "avx512f")]
9073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9074#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9075#[rustc_legacy_const_generics(3)]
9076pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9077    unsafe {
9078        static_assert_rounding!(ROUNDING);
9079        vfmadd132pdround(a, b, simd_neg(c), ROUNDING)
9080    }
9081}
9082
9083/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9084///
9085/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9086/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9087/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9088/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9089/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9090/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9091///
9092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
9093#[inline]
9094#[target_feature(enable = "avx512f")]
9095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9096#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9097#[rustc_legacy_const_generics(4)]
9098pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
9099    a: __m512d,
9100    k: __mmask8,
9101    b: __m512d,
9102    c: __m512d,
9103) -> __m512d {
9104    unsafe {
9105        static_assert_rounding!(ROUNDING);
9106        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
9107        simd_select_bitmask(k, r, a)
9108    }
9109}
9110
9111/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9112///
9113/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9114/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9115/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9116/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9117/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9118/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9119///
9120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
9121#[inline]
9122#[target_feature(enable = "avx512f")]
9123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9124#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9125#[rustc_legacy_const_generics(4)]
9126pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
9127    k: __mmask8,
9128    a: __m512d,
9129    b: __m512d,
9130    c: __m512d,
9131) -> __m512d {
9132    unsafe {
9133        static_assert_rounding!(ROUNDING);
9134        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
9135        simd_select_bitmask(k, r, _mm512_setzero_pd())
9136    }
9137}
9138
9139/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9140///
9141/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9142/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9143/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9144/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9145/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9146/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9147///
9148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
9149#[inline]
9150#[target_feature(enable = "avx512f")]
9151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9152#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
9153#[rustc_legacy_const_generics(4)]
9154pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
9155    a: __m512d,
9156    b: __m512d,
9157    c: __m512d,
9158    k: __mmask8,
9159) -> __m512d {
9160    unsafe {
9161        static_assert_rounding!(ROUNDING);
9162        let r = vfmadd132pdround(a, b, simd_neg(c), ROUNDING);
9163        simd_select_bitmask(k, r, c)
9164    }
9165}
9166
9167/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9168///
9169/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9170/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9171/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9172/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9173/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9174/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9175///
9176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
9177#[inline]
9178#[target_feature(enable = "avx512f")]
9179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9180#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9181#[rustc_legacy_const_generics(3)]
9182pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9183    unsafe {
9184        static_assert_rounding!(ROUNDING);
9185        vfmaddsubpsround(a, b, c, ROUNDING)
9186    }
9187}
9188
9189/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9190///
9191/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9192/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9193/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9194/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9195/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9196/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9197///
9198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
9199#[inline]
9200#[target_feature(enable = "avx512f")]
9201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9202#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9203#[rustc_legacy_const_generics(4)]
9204pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
9205    a: __m512,
9206    k: __mmask16,
9207    b: __m512,
9208    c: __m512,
9209) -> __m512 {
9210    unsafe {
9211        static_assert_rounding!(ROUNDING);
9212        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), a)
9213    }
9214}
9215
9216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9217///
9218/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9219/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9220/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9221/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9222/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9223/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9224///
9225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
9226#[inline]
9227#[target_feature(enable = "avx512f")]
9228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9229#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9230#[rustc_legacy_const_generics(4)]
9231pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
9232    k: __mmask16,
9233    a: __m512,
9234    b: __m512,
9235    c: __m512,
9236) -> __m512 {
9237    unsafe {
9238        static_assert_rounding!(ROUNDING);
9239        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), _mm512_setzero_ps())
9240    }
9241}
9242
9243/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9244///
9245/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9246/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9247/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9248/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9249/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9250/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9251///
9252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
9253#[inline]
9254#[target_feature(enable = "avx512f")]
9255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9256#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
9257#[rustc_legacy_const_generics(4)]
9258pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
9259    a: __m512,
9260    b: __m512,
9261    c: __m512,
9262    k: __mmask16,
9263) -> __m512 {
9264    unsafe {
9265        static_assert_rounding!(ROUNDING);
9266        simd_select_bitmask(k, vfmaddsubpsround(a, b, c, ROUNDING), c)
9267    }
9268}
9269
9270/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
9271///
9272/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9273/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9274/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9275/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9276/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9277/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9278///
9279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
9280#[inline]
9281#[target_feature(enable = "avx512f")]
9282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9283#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9284#[rustc_legacy_const_generics(3)]
9285pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9286    a: __m512d,
9287    b: __m512d,
9288    c: __m512d,
9289) -> __m512d {
9290    unsafe {
9291        static_assert_rounding!(ROUNDING);
9292        vfmaddsubpdround(a, b, c, ROUNDING)
9293    }
9294}
9295
9296/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9297///
9298/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9303/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9304///
9305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9306#[inline]
9307#[target_feature(enable = "avx512f")]
9308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9309#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9310#[rustc_legacy_const_generics(4)]
9311pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9312    a: __m512d,
9313    k: __mmask8,
9314    b: __m512d,
9315    c: __m512d,
9316) -> __m512d {
9317    unsafe {
9318        static_assert_rounding!(ROUNDING);
9319        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), a)
9320    }
9321}
9322
9323/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9324///
9325/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9326/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9327/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9328/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9329/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9330/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9331///
9332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9333#[inline]
9334#[target_feature(enable = "avx512f")]
9335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9336#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9337#[rustc_legacy_const_generics(4)]
9338pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9339    k: __mmask8,
9340    a: __m512d,
9341    b: __m512d,
9342    c: __m512d,
9343) -> __m512d {
9344    unsafe {
9345        static_assert_rounding!(ROUNDING);
9346        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), _mm512_setzero_pd())
9347    }
9348}
9349
9350/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9351///
9352/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9353/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9354/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9355/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9356/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9357/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9358///
9359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9360#[inline]
9361#[target_feature(enable = "avx512f")]
9362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9363#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9364#[rustc_legacy_const_generics(4)]
9365pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9366    a: __m512d,
9367    b: __m512d,
9368    c: __m512d,
9369    k: __mmask8,
9370) -> __m512d {
9371    unsafe {
9372        static_assert_rounding!(ROUNDING);
9373        simd_select_bitmask(k, vfmaddsubpdround(a, b, c, ROUNDING), c)
9374    }
9375}
9376
9377/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9378///
9379/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9380/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9381/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9382/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9383/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9384/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9385///
9386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9387#[inline]
9388#[target_feature(enable = "avx512f")]
9389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9390#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9391#[rustc_legacy_const_generics(3)]
9392pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9393    unsafe {
9394        static_assert_rounding!(ROUNDING);
9395        vfmaddsubpsround(a, b, simd_neg(c), ROUNDING)
9396    }
9397}
9398
9399/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9400///
9401/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9402/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9403/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9404/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9405/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9406/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9407///
9408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9409#[inline]
9410#[target_feature(enable = "avx512f")]
9411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9412#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9413#[rustc_legacy_const_generics(4)]
9414pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9415    a: __m512,
9416    k: __mmask16,
9417    b: __m512,
9418    c: __m512,
9419) -> __m512 {
9420    unsafe {
9421        static_assert_rounding!(ROUNDING);
9422        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9423        simd_select_bitmask(k, r, a)
9424    }
9425}
9426
9427/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9428///
9429/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9430/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9431/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9432/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9433/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9434/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9435///
9436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9437#[inline]
9438#[target_feature(enable = "avx512f")]
9439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9440#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9441#[rustc_legacy_const_generics(4)]
9442pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9443    k: __mmask16,
9444    a: __m512,
9445    b: __m512,
9446    c: __m512,
9447) -> __m512 {
9448    unsafe {
9449        static_assert_rounding!(ROUNDING);
9450        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9451        simd_select_bitmask(k, r, _mm512_setzero_ps())
9452    }
9453}
9454
9455/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9456///
9457/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9458/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9459/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9460/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9461/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9462/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9463///
9464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9465#[inline]
9466#[target_feature(enable = "avx512f")]
9467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9468#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9469#[rustc_legacy_const_generics(4)]
9470pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9471    a: __m512,
9472    b: __m512,
9473    c: __m512,
9474    k: __mmask16,
9475) -> __m512 {
9476    unsafe {
9477        static_assert_rounding!(ROUNDING);
9478        let r = vfmaddsubpsround(a, b, simd_neg(c), ROUNDING);
9479        simd_select_bitmask(k, r, c)
9480    }
9481}
9482
9483/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9484///
9485/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9486/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9487/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9488/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9489/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9490/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9491///
9492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9493#[inline]
9494#[target_feature(enable = "avx512f")]
9495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9496#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9497#[rustc_legacy_const_generics(3)]
9498pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9499    a: __m512d,
9500    b: __m512d,
9501    c: __m512d,
9502) -> __m512d {
9503    unsafe {
9504        static_assert_rounding!(ROUNDING);
9505        vfmaddsubpdround(a, b, simd_neg(c), ROUNDING)
9506    }
9507}
9508
9509/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9510///
9511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9517///
9518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9519#[inline]
9520#[target_feature(enable = "avx512f")]
9521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9522#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9523#[rustc_legacy_const_generics(4)]
9524pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9525    a: __m512d,
9526    k: __mmask8,
9527    b: __m512d,
9528    c: __m512d,
9529) -> __m512d {
9530    unsafe {
9531        static_assert_rounding!(ROUNDING);
9532        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9533        simd_select_bitmask(k, r, a)
9534    }
9535}
9536
9537/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9538///
9539/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9545///
9546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9547#[inline]
9548#[target_feature(enable = "avx512f")]
9549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9550#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9551#[rustc_legacy_const_generics(4)]
9552pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9553    k: __mmask8,
9554    a: __m512d,
9555    b: __m512d,
9556    c: __m512d,
9557) -> __m512d {
9558    unsafe {
9559        static_assert_rounding!(ROUNDING);
9560        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9561        simd_select_bitmask(k, r, _mm512_setzero_pd())
9562    }
9563}
9564
9565/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9566///
9567/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9568/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9569/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9570/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9571/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9572/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9573///
9574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9575#[inline]
9576#[target_feature(enable = "avx512f")]
9577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9578#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9579#[rustc_legacy_const_generics(4)]
9580pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9581    a: __m512d,
9582    b: __m512d,
9583    c: __m512d,
9584    k: __mmask8,
9585) -> __m512d {
9586    unsafe {
9587        static_assert_rounding!(ROUNDING);
9588        let r = vfmaddsubpdround(a, b, simd_neg(c), ROUNDING);
9589        simd_select_bitmask(k, r, c)
9590    }
9591}
9592
9593/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9594///
9595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9601///
9602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9603#[inline]
9604#[target_feature(enable = "avx512f")]
9605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9606#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9607#[rustc_legacy_const_generics(3)]
9608pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9609    unsafe {
9610        static_assert_rounding!(ROUNDING);
9611        vfmadd132psround(simd_neg(a), b, c, ROUNDING)
9612    }
9613}
9614
9615/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9616///
9617/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9618/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9619/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9620/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9621/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9622/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9623///
9624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9625#[inline]
9626#[target_feature(enable = "avx512f")]
9627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9628#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9629#[rustc_legacy_const_generics(4)]
9630pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9631    a: __m512,
9632    k: __mmask16,
9633    b: __m512,
9634    c: __m512,
9635) -> __m512 {
9636    unsafe {
9637        static_assert_rounding!(ROUNDING);
9638        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9639        simd_select_bitmask(k, r, a)
9640    }
9641}
9642
9643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9644///
9645/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9646/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9647/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9648/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9649/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9650/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9651///
9652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9653#[inline]
9654#[target_feature(enable = "avx512f")]
9655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9656#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9657#[rustc_legacy_const_generics(4)]
9658pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9659    k: __mmask16,
9660    a: __m512,
9661    b: __m512,
9662    c: __m512,
9663) -> __m512 {
9664    unsafe {
9665        static_assert_rounding!(ROUNDING);
9666        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9667        simd_select_bitmask(k, r, _mm512_setzero_ps())
9668    }
9669}
9670
9671/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9672///
9673/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9674/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9675/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9676/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9677/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9678/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9679///
9680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9681#[inline]
9682#[target_feature(enable = "avx512f")]
9683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9684#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9685#[rustc_legacy_const_generics(4)]
9686pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9687    a: __m512,
9688    b: __m512,
9689    c: __m512,
9690    k: __mmask16,
9691) -> __m512 {
9692    unsafe {
9693        static_assert_rounding!(ROUNDING);
9694        let r = vfmadd132psround(simd_neg(a), b, c, ROUNDING);
9695        simd_select_bitmask(k, r, c)
9696    }
9697}
9698
9699/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9700///
9701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9707///
9708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9709#[inline]
9710#[target_feature(enable = "avx512f")]
9711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9712#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9713#[rustc_legacy_const_generics(3)]
9714pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9715    unsafe {
9716        static_assert_rounding!(ROUNDING);
9717        vfmadd132pdround(simd_neg(a), b, c, ROUNDING)
9718    }
9719}
9720
9721/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9722///
9723/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9724/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9725/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9726/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9727/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9728/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9729///
9730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9731#[inline]
9732#[target_feature(enable = "avx512f")]
9733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9734#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9735#[rustc_legacy_const_generics(4)]
9736pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9737    a: __m512d,
9738    k: __mmask8,
9739    b: __m512d,
9740    c: __m512d,
9741) -> __m512d {
9742    unsafe {
9743        static_assert_rounding!(ROUNDING);
9744        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9745        simd_select_bitmask(k, r, a)
9746    }
9747}
9748
9749/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9750///
9751/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9752/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9753/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9754/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9755/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9756/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9762#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9765    k: __mmask8,
9766    a: __m512d,
9767    b: __m512d,
9768    c: __m512d,
9769) -> __m512d {
9770    unsafe {
9771        static_assert_rounding!(ROUNDING);
9772        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9773        simd_select_bitmask(k, r, _mm512_setzero_pd())
9774    }
9775}
9776
9777/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9778///
9779/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9780/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9781/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9782/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9783/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9784/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9785///
9786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9787#[inline]
9788#[target_feature(enable = "avx512f")]
9789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9790#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9791#[rustc_legacy_const_generics(4)]
9792pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9793    a: __m512d,
9794    b: __m512d,
9795    c: __m512d,
9796    k: __mmask8,
9797) -> __m512d {
9798    unsafe {
9799        static_assert_rounding!(ROUNDING);
9800        let r = vfmadd132pdround(simd_neg(a), b, c, ROUNDING);
9801        simd_select_bitmask(k, r, c)
9802    }
9803}
9804
9805/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9806///
9807/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9808/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9809/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9810/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9811/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9812/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9813///
9814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9815#[inline]
9816#[target_feature(enable = "avx512f")]
9817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9818#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9819#[rustc_legacy_const_generics(3)]
9820pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9821    unsafe {
9822        static_assert_rounding!(ROUNDING);
9823        vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING)
9824    }
9825}
9826
9827/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9828///
9829/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9830/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9831/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9832/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9833/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9834/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9835///
9836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9837#[inline]
9838#[target_feature(enable = "avx512f")]
9839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9840#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9841#[rustc_legacy_const_generics(4)]
9842pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9843    a: __m512,
9844    k: __mmask16,
9845    b: __m512,
9846    c: __m512,
9847) -> __m512 {
9848    unsafe {
9849        static_assert_rounding!(ROUNDING);
9850        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9851        simd_select_bitmask(k, r, a)
9852    }
9853}
9854
9855/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9856///
9857/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9858/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9859/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9860/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9861/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9862/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9863///
9864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9865#[inline]
9866#[target_feature(enable = "avx512f")]
9867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9868#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9869#[rustc_legacy_const_generics(4)]
9870pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9871    k: __mmask16,
9872    a: __m512,
9873    b: __m512,
9874    c: __m512,
9875) -> __m512 {
9876    unsafe {
9877        static_assert_rounding!(ROUNDING);
9878        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9879        simd_select_bitmask(k, r, _mm512_setzero_ps())
9880    }
9881}
9882
9883/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9884///
9885/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9886/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9887/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9888/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9889/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9890/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9891///
9892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9893#[inline]
9894#[target_feature(enable = "avx512f")]
9895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9896#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9897#[rustc_legacy_const_generics(4)]
9898pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9899    a: __m512,
9900    b: __m512,
9901    c: __m512,
9902    k: __mmask16,
9903) -> __m512 {
9904    unsafe {
9905        static_assert_rounding!(ROUNDING);
9906        let r = vfmadd132psround(simd_neg(a), b, simd_neg(c), ROUNDING);
9907        simd_select_bitmask(k, r, c)
9908    }
9909}
9910
9911/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9912///
9913/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9914/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9915/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9916/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9917/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9919///
9920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9921#[inline]
9922#[target_feature(enable = "avx512f")]
9923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9924#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9925#[rustc_legacy_const_generics(3)]
9926pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9927    unsafe {
9928        static_assert_rounding!(ROUNDING);
9929        vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING)
9930    }
9931}
9932
9933/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9934///
9935/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9936/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9937/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9938/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9939/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9940/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9941///
9942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9943#[inline]
9944#[target_feature(enable = "avx512f")]
9945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9946#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9947#[rustc_legacy_const_generics(4)]
9948pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9949    a: __m512d,
9950    k: __mmask8,
9951    b: __m512d,
9952    c: __m512d,
9953) -> __m512d {
9954    unsafe {
9955        static_assert_rounding!(ROUNDING);
9956        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9957        simd_select_bitmask(k, r, a)
9958    }
9959}
9960
9961/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9962///
9963/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9964/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9965/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9966/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9967/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9968/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9969///
9970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9971#[inline]
9972#[target_feature(enable = "avx512f")]
9973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9974#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9975#[rustc_legacy_const_generics(4)]
9976pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9977    k: __mmask8,
9978    a: __m512d,
9979    b: __m512d,
9980    c: __m512d,
9981) -> __m512d {
9982    unsafe {
9983        static_assert_rounding!(ROUNDING);
9984        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
9985        simd_select_bitmask(k, r, _mm512_setzero_pd())
9986    }
9987}
9988
9989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9990///
9991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9997///
9998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9999#[inline]
10000#[target_feature(enable = "avx512f")]
10001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10002#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
10003#[rustc_legacy_const_generics(4)]
10004pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
10005    a: __m512d,
10006    b: __m512d,
10007    c: __m512d,
10008    k: __mmask8,
10009) -> __m512d {
10010    unsafe {
10011        static_assert_rounding!(ROUNDING);
10012        let r = vfmadd132pdround(simd_neg(a), b, simd_neg(c), ROUNDING);
10013        simd_select_bitmask(k, r, c)
10014    }
10015}
10016
10017/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10018/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10019///
10020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
10021#[inline]
10022#[target_feature(enable = "avx512f")]
10023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10024#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10025#[rustc_legacy_const_generics(2)]
10026pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10027    unsafe {
10028        static_assert_sae!(SAE);
10029        let a = a.as_f32x16();
10030        let b = b.as_f32x16();
10031        let r = vmaxps(a, b, SAE);
10032        transmute(r)
10033    }
10034}
10035
10036/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10037/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10038///
10039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
10040#[inline]
10041#[target_feature(enable = "avx512f")]
10042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10043#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10044#[rustc_legacy_const_generics(4)]
10045pub fn _mm512_mask_max_round_ps<const SAE: i32>(
10046    src: __m512,
10047    k: __mmask16,
10048    a: __m512,
10049    b: __m512,
10050) -> __m512 {
10051    unsafe {
10052        static_assert_sae!(SAE);
10053        let a = a.as_f32x16();
10054        let b = b.as_f32x16();
10055        let r = vmaxps(a, b, SAE);
10056        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
10057    }
10058}
10059
10060/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10061/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10062///
10063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
10064#[inline]
10065#[target_feature(enable = "avx512f")]
10066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10067#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
10068#[rustc_legacy_const_generics(3)]
10069pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10070    unsafe {
10071        static_assert_sae!(SAE);
10072        let a = a.as_f32x16();
10073        let b = b.as_f32x16();
10074        let r = vmaxps(a, b, SAE);
10075        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
10076    }
10077}
10078
10079/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
10080/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10081///
10082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
10083#[inline]
10084#[target_feature(enable = "avx512f")]
10085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10086#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10087#[rustc_legacy_const_generics(2)]
10088pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10089    unsafe {
10090        static_assert_sae!(SAE);
10091        let a = a.as_f64x8();
10092        let b = b.as_f64x8();
10093        let r = vmaxpd(a, b, SAE);
10094        transmute(r)
10095    }
10096}
10097
10098/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10099/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10100///
10101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
10102#[inline]
10103#[target_feature(enable = "avx512f")]
10104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10105#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10106#[rustc_legacy_const_generics(4)]
10107pub fn _mm512_mask_max_round_pd<const SAE: i32>(
10108    src: __m512d,
10109    k: __mmask8,
10110    a: __m512d,
10111    b: __m512d,
10112) -> __m512d {
10113    unsafe {
10114        static_assert_sae!(SAE);
10115        let a = a.as_f64x8();
10116        let b = b.as_f64x8();
10117        let r = vmaxpd(a, b, SAE);
10118        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
10119    }
10120}
10121
10122/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10123/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10124///
10125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
10126#[inline]
10127#[target_feature(enable = "avx512f")]
10128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10129#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
10130#[rustc_legacy_const_generics(3)]
10131pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10132    unsafe {
10133        static_assert_sae!(SAE);
10134        let a = a.as_f64x8();
10135        let b = b.as_f64x8();
10136        let r = vmaxpd(a, b, SAE);
10137        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
10138    }
10139}
10140
10141/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10142/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10143///
10144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
10145#[inline]
10146#[target_feature(enable = "avx512f")]
10147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10148#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10149#[rustc_legacy_const_generics(2)]
10150pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
10151    unsafe {
10152        static_assert_sae!(SAE);
10153        let a = a.as_f32x16();
10154        let b = b.as_f32x16();
10155        let r = vminps(a, b, SAE);
10156        transmute(r)
10157    }
10158}
10159
10160/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162///
10163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
10164#[inline]
10165#[target_feature(enable = "avx512f")]
10166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10167#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10168#[rustc_legacy_const_generics(4)]
10169pub fn _mm512_mask_min_round_ps<const SAE: i32>(
10170    src: __m512,
10171    k: __mmask16,
10172    a: __m512,
10173    b: __m512,
10174) -> __m512 {
10175    unsafe {
10176        static_assert_sae!(SAE);
10177        let a = a.as_f32x16();
10178        let b = b.as_f32x16();
10179        let r = vminps(a, b, SAE);
10180        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
10181    }
10182}
10183
10184/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10185/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10186///
10187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
10188#[inline]
10189#[target_feature(enable = "avx512f")]
10190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10191#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
10192#[rustc_legacy_const_generics(3)]
10193pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
10194    unsafe {
10195        static_assert_sae!(SAE);
10196        let a = a.as_f32x16();
10197        let b = b.as_f32x16();
10198        let r = vminps(a, b, SAE);
10199        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
10200    }
10201}
10202
10203/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
10204/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10205///
10206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
10207#[inline]
10208#[target_feature(enable = "avx512f")]
10209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10210#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10211#[rustc_legacy_const_generics(2)]
10212pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
10213    unsafe {
10214        static_assert_sae!(SAE);
10215        let a = a.as_f64x8();
10216        let b = b.as_f64x8();
10217        let r = vminpd(a, b, SAE);
10218        transmute(r)
10219    }
10220}
10221
10222/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10223/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10224///
10225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
10226#[inline]
10227#[target_feature(enable = "avx512f")]
10228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10229#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10230#[rustc_legacy_const_generics(4)]
10231pub fn _mm512_mask_min_round_pd<const SAE: i32>(
10232    src: __m512d,
10233    k: __mmask8,
10234    a: __m512d,
10235    b: __m512d,
10236) -> __m512d {
10237    unsafe {
10238        static_assert_sae!(SAE);
10239        let a = a.as_f64x8();
10240        let b = b.as_f64x8();
10241        let r = vminpd(a, b, SAE);
10242        transmute(simd_select_bitmask(k, r, src.as_f64x8()))
10243    }
10244}
10245
10246/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10247/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10248///
10249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
10250#[inline]
10251#[target_feature(enable = "avx512f")]
10252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10253#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
10254#[rustc_legacy_const_generics(3)]
10255pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
10256    unsafe {
10257        static_assert_sae!(SAE);
10258        let a = a.as_f64x8();
10259        let b = b.as_f64x8();
10260        let r = vminpd(a, b, SAE);
10261        transmute(simd_select_bitmask(k, r, f64x8::ZERO))
10262    }
10263}
10264
10265/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10266/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10267///
10268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
10269#[inline]
10270#[target_feature(enable = "avx512f")]
10271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10272#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10273#[rustc_legacy_const_generics(1)]
10274pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
10275    unsafe {
10276        static_assert_sae!(SAE);
10277        let a = a.as_f32x16();
10278        let r = vgetexpps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
10279        transmute(r)
10280    }
10281}
10282
10283/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10284/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10285///
10286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10287#[inline]
10288#[target_feature(enable = "avx512f")]
10289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10290#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10291#[rustc_legacy_const_generics(3)]
10292pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10293    unsafe {
10294        static_assert_sae!(SAE);
10295        let a = a.as_f32x16();
10296        let src = src.as_f32x16();
10297        let r = vgetexpps(a, src, k, SAE);
10298        transmute(r)
10299    }
10300}
10301
10302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10303/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10304///
10305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10306#[inline]
10307#[target_feature(enable = "avx512f")]
10308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10309#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10310#[rustc_legacy_const_generics(2)]
10311pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10312    unsafe {
10313        static_assert_sae!(SAE);
10314        let a = a.as_f32x16();
10315        let r = vgetexpps(a, f32x16::ZERO, k, SAE);
10316        transmute(r)
10317    }
10318}
10319
10320/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10321/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10322///
10323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10324#[inline]
10325#[target_feature(enable = "avx512f")]
10326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10327#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10328#[rustc_legacy_const_generics(1)]
10329pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10330    unsafe {
10331        static_assert_sae!(SAE);
10332        let a = a.as_f64x8();
10333        let r = vgetexppd(a, f64x8::ZERO, 0b11111111, SAE);
10334        transmute(r)
10335    }
10336}
10337
10338/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10339/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10340///
10341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10342#[inline]
10343#[target_feature(enable = "avx512f")]
10344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10345#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10346#[rustc_legacy_const_generics(3)]
10347pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10348    src: __m512d,
10349    k: __mmask8,
10350    a: __m512d,
10351) -> __m512d {
10352    unsafe {
10353        static_assert_sae!(SAE);
10354        let a = a.as_f64x8();
10355        let src = src.as_f64x8();
10356        let r = vgetexppd(a, src, k, SAE);
10357        transmute(r)
10358    }
10359}
10360
10361/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10362/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10363///
10364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10365#[inline]
10366#[target_feature(enable = "avx512f")]
10367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10368#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10369#[rustc_legacy_const_generics(2)]
10370pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10371    unsafe {
10372        static_assert_sae!(SAE);
10373        let a = a.as_f64x8();
10374        let r = vgetexppd(a, f64x8::ZERO, k, SAE);
10375        transmute(r)
10376    }
10377}
10378
10379/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10380/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10381/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10382/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10383/// * [`_MM_FROUND_TO_POS_INF`] : round up
10384/// * [`_MM_FROUND_TO_ZERO`] : truncate
10385/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10386///
10387/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10389#[inline]
10390#[target_feature(enable = "avx512f")]
10391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10392#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10393#[rustc_legacy_const_generics(1, 2)]
10394pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10395    unsafe {
10396        static_assert_uimm_bits!(IMM8, 8);
10397        static_assert_mantissas_sae!(SAE);
10398        let a = a.as_f32x16();
10399        let r = vrndscaleps(a, IMM8, f32x16::ZERO, 0b11111111_11111111, SAE);
10400        transmute(r)
10401    }
10402}
10403
10404/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10405/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10406/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10407/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10408/// * [`_MM_FROUND_TO_POS_INF`] : round up
10409/// * [`_MM_FROUND_TO_ZERO`] : truncate
10410/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10411///
10412/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10414#[inline]
10415#[target_feature(enable = "avx512f")]
10416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10417#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10418#[rustc_legacy_const_generics(3, 4)]
10419pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10420    src: __m512,
10421    k: __mmask16,
10422    a: __m512,
10423) -> __m512 {
10424    unsafe {
10425        static_assert_uimm_bits!(IMM8, 8);
10426        static_assert_mantissas_sae!(SAE);
10427        let a = a.as_f32x16();
10428        let src = src.as_f32x16();
10429        let r = vrndscaleps(a, IMM8, src, k, SAE);
10430        transmute(r)
10431    }
10432}
10433
10434/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10435/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10436/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10437/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10438/// * [`_MM_FROUND_TO_POS_INF`] : round up
10439/// * [`_MM_FROUND_TO_ZERO`] : truncate
10440/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10441///
10442/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10444#[inline]
10445#[target_feature(enable = "avx512f")]
10446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10447#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10448#[rustc_legacy_const_generics(2, 3)]
10449pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10450    k: __mmask16,
10451    a: __m512,
10452) -> __m512 {
10453    unsafe {
10454        static_assert_uimm_bits!(IMM8, 8);
10455        static_assert_mantissas_sae!(SAE);
10456        let a = a.as_f32x16();
10457        let r = vrndscaleps(a, IMM8, f32x16::ZERO, k, SAE);
10458        transmute(r)
10459    }
10460}
10461
10462/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10463/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10464/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10465/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10466/// * [`_MM_FROUND_TO_POS_INF`] : round up
10467/// * [`_MM_FROUND_TO_ZERO`] : truncate
10468/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10469///
10470/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10472#[inline]
10473#[target_feature(enable = "avx512f")]
10474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10475#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10476#[rustc_legacy_const_generics(1, 2)]
10477pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10478    unsafe {
10479        static_assert_uimm_bits!(IMM8, 8);
10480        static_assert_mantissas_sae!(SAE);
10481        let a = a.as_f64x8();
10482        let r = vrndscalepd(a, IMM8, f64x8::ZERO, 0b11111111, SAE);
10483        transmute(r)
10484    }
10485}
10486
10487/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10488/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10489/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10490/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10491/// * [`_MM_FROUND_TO_POS_INF`] : round up
10492/// * [`_MM_FROUND_TO_ZERO`] : truncate
10493/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10494///
10495/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10497#[inline]
10498#[target_feature(enable = "avx512f")]
10499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10500#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10501#[rustc_legacy_const_generics(3, 4)]
10502pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10503    src: __m512d,
10504    k: __mmask8,
10505    a: __m512d,
10506) -> __m512d {
10507    unsafe {
10508        static_assert_uimm_bits!(IMM8, 8);
10509        static_assert_mantissas_sae!(SAE);
10510        let a = a.as_f64x8();
10511        let src = src.as_f64x8();
10512        let r = vrndscalepd(a, IMM8, src, k, SAE);
10513        transmute(r)
10514    }
10515}
10516
10517/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10518/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10519/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10520/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10521/// * [`_MM_FROUND_TO_POS_INF`] : round up
10522/// * [`_MM_FROUND_TO_ZERO`] : truncate
10523/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10524///
10525/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10527#[inline]
10528#[target_feature(enable = "avx512f")]
10529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10530#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10531#[rustc_legacy_const_generics(2, 3)]
10532pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10533    k: __mmask8,
10534    a: __m512d,
10535) -> __m512d {
10536    unsafe {
10537        static_assert_uimm_bits!(IMM8, 8);
10538        static_assert_mantissas_sae!(SAE);
10539        let a = a.as_f64x8();
10540        let r = vrndscalepd(a, IMM8, f64x8::ZERO, k, SAE);
10541        transmute(r)
10542    }
10543}
10544
10545/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10546///
10547/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10548/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10549/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10550/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10551/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10553///
10554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10555#[inline]
10556#[target_feature(enable = "avx512f")]
10557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10558#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10559#[rustc_legacy_const_generics(2)]
10560pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10561    unsafe {
10562        static_assert_rounding!(ROUNDING);
10563        let a = a.as_f32x16();
10564        let b = b.as_f32x16();
10565        let r = vscalefps(a, b, f32x16::ZERO, 0b11111111_11111111, ROUNDING);
10566        transmute(r)
10567    }
10568}
10569
10570/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10571///
10572/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10573/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10574/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10575/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10576/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10577/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10578///
10579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10580#[inline]
10581#[target_feature(enable = "avx512f")]
10582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10583#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10584#[rustc_legacy_const_generics(4)]
10585pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10586    src: __m512,
10587    k: __mmask16,
10588    a: __m512,
10589    b: __m512,
10590) -> __m512 {
10591    unsafe {
10592        static_assert_rounding!(ROUNDING);
10593        let a = a.as_f32x16();
10594        let b = b.as_f32x16();
10595        let src = src.as_f32x16();
10596        let r = vscalefps(a, b, src, k, ROUNDING);
10597        transmute(r)
10598    }
10599}
10600
10601/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10602///
10603/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10604/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10605/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10606/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10607/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10608/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10609///
10610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10611#[inline]
10612#[target_feature(enable = "avx512f")]
10613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10614#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10615#[rustc_legacy_const_generics(3)]
10616pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10617    k: __mmask16,
10618    a: __m512,
10619    b: __m512,
10620) -> __m512 {
10621    unsafe {
10622        static_assert_rounding!(ROUNDING);
10623        let a = a.as_f32x16();
10624        let b = b.as_f32x16();
10625        let r = vscalefps(a, b, f32x16::ZERO, k, ROUNDING);
10626        transmute(r)
10627    }
10628}
10629
10630/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10631///
10632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10638///
10639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10640#[inline]
10641#[target_feature(enable = "avx512f")]
10642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10643#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10644#[rustc_legacy_const_generics(2)]
10645pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10646    unsafe {
10647        static_assert_rounding!(ROUNDING);
10648        let a = a.as_f64x8();
10649        let b = b.as_f64x8();
10650        let r = vscalefpd(a, b, f64x8::ZERO, 0b11111111, ROUNDING);
10651        transmute(r)
10652    }
10653}
10654
10655/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10656///
10657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10658/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10659/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10660/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10661/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10662/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10663///
10664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10665#[inline]
10666#[target_feature(enable = "avx512f")]
10667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10668#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10669#[rustc_legacy_const_generics(4)]
10670pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10671    src: __m512d,
10672    k: __mmask8,
10673    a: __m512d,
10674    b: __m512d,
10675) -> __m512d {
10676    unsafe {
10677        static_assert_rounding!(ROUNDING);
10678        let a = a.as_f64x8();
10679        let b = b.as_f64x8();
10680        let src = src.as_f64x8();
10681        let r = vscalefpd(a, b, src, k, ROUNDING);
10682        transmute(r)
10683    }
10684}
10685
10686/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10687///
10688/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10689/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10690/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10691/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10692/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10693/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10694///
10695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10696#[inline]
10697#[target_feature(enable = "avx512f")]
10698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10699#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10700#[rustc_legacy_const_generics(3)]
10701pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10702    k: __mmask8,
10703    a: __m512d,
10704    b: __m512d,
10705) -> __m512d {
10706    unsafe {
10707        static_assert_rounding!(ROUNDING);
10708        let a = a.as_f64x8();
10709        let b = b.as_f64x8();
10710        let r = vscalefpd(a, b, f64x8::ZERO, k, ROUNDING);
10711        transmute(r)
10712    }
10713}
10714
10715/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10716///
10717/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10719#[inline]
10720#[target_feature(enable = "avx512f")]
10721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10722#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10723#[rustc_legacy_const_generics(3, 4)]
10724pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10725    a: __m512,
10726    b: __m512,
10727    c: __m512i,
10728) -> __m512 {
10729    unsafe {
10730        static_assert_uimm_bits!(IMM8, 8);
10731        static_assert_mantissas_sae!(SAE);
10732        let a = a.as_f32x16();
10733        let b = b.as_f32x16();
10734        let c = c.as_i32x16();
10735        let r = vfixupimmps(a, b, c, IMM8, 0b11111111_11111111, SAE);
10736        transmute(r)
10737    }
10738}
10739
10740/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10741///
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10744#[inline]
10745#[target_feature(enable = "avx512f")]
10746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10747#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10748#[rustc_legacy_const_generics(4, 5)]
10749pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10750    a: __m512,
10751    k: __mmask16,
10752    b: __m512,
10753    c: __m512i,
10754) -> __m512 {
10755    unsafe {
10756        static_assert_uimm_bits!(IMM8, 8);
10757        static_assert_mantissas_sae!(SAE);
10758        let a = a.as_f32x16();
10759        let b = b.as_f32x16();
10760        let c = c.as_i32x16();
10761        let r = vfixupimmps(a, b, c, IMM8, k, SAE);
10762        transmute(r)
10763    }
10764}
10765
10766/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10767///
10768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10770#[inline]
10771#[target_feature(enable = "avx512f")]
10772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10773#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10774#[rustc_legacy_const_generics(4, 5)]
10775pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10776    k: __mmask16,
10777    a: __m512,
10778    b: __m512,
10779    c: __m512i,
10780) -> __m512 {
10781    unsafe {
10782        static_assert_uimm_bits!(IMM8, 8);
10783        static_assert_mantissas_sae!(SAE);
10784        let a = a.as_f32x16();
10785        let b = b.as_f32x16();
10786        let c = c.as_i32x16();
10787        let r = vfixupimmpsz(a, b, c, IMM8, k, SAE);
10788        transmute(r)
10789    }
10790}
10791
10792/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10793///
10794/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10796#[inline]
10797#[target_feature(enable = "avx512f")]
10798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10799#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10800#[rustc_legacy_const_generics(3, 4)]
10801pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10802    a: __m512d,
10803    b: __m512d,
10804    c: __m512i,
10805) -> __m512d {
10806    unsafe {
10807        static_assert_uimm_bits!(IMM8, 8);
10808        static_assert_mantissas_sae!(SAE);
10809        let a = a.as_f64x8();
10810        let b = b.as_f64x8();
10811        let c = c.as_i64x8();
10812        let r = vfixupimmpd(a, b, c, IMM8, 0b11111111, SAE);
10813        transmute(r)
10814    }
10815}
10816
10817/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10818///
10819/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10821#[inline]
10822#[target_feature(enable = "avx512f")]
10823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10824#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10825#[rustc_legacy_const_generics(4, 5)]
10826pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10827    a: __m512d,
10828    k: __mmask8,
10829    b: __m512d,
10830    c: __m512i,
10831) -> __m512d {
10832    unsafe {
10833        static_assert_uimm_bits!(IMM8, 8);
10834        static_assert_mantissas_sae!(SAE);
10835        let a = a.as_f64x8();
10836        let b = b.as_f64x8();
10837        let c = c.as_i64x8();
10838        let r = vfixupimmpd(a, b, c, IMM8, k, SAE);
10839        transmute(r)
10840    }
10841}
10842
10843/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10844///
10845/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10847#[inline]
10848#[target_feature(enable = "avx512f")]
10849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10850#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10851#[rustc_legacy_const_generics(4, 5)]
10852pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10853    k: __mmask8,
10854    a: __m512d,
10855    b: __m512d,
10856    c: __m512i,
10857) -> __m512d {
10858    unsafe {
10859        static_assert_uimm_bits!(IMM8, 8);
10860        static_assert_mantissas_sae!(SAE);
10861        let a = a.as_f64x8();
10862        let b = b.as_f64x8();
10863        let c = c.as_i64x8();
10864        let r = vfixupimmpdz(a, b, c, IMM8, k, SAE);
10865        transmute(r)
10866    }
10867}
10868
10869/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10870/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10871///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10872///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10873///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10874///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10875/// The sign is determined by sc which can take the following values:\
10876///    _MM_MANT_SIGN_src     // sign = sign(src)\
10877///    _MM_MANT_SIGN_zero    // sign = 0\
10878///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10879/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10880///
10881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10882#[inline]
10883#[target_feature(enable = "avx512f")]
10884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10885#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10886#[rustc_legacy_const_generics(1, 2, 3)]
10887pub fn _mm512_getmant_round_ps<
10888    const NORM: _MM_MANTISSA_NORM_ENUM,
10889    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10890    const SAE: i32,
10891>(
10892    a: __m512,
10893) -> __m512 {
10894    unsafe {
10895        static_assert_uimm_bits!(NORM, 4);
10896        static_assert_uimm_bits!(SIGN, 2);
10897        static_assert_mantissas_sae!(SAE);
10898        let a = a.as_f32x16();
10899        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, 0b11111111_11111111, SAE);
10900        transmute(r)
10901    }
10902}
10903
10904/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10905/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10906///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10907///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10908///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10909///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10910/// The sign is determined by sc which can take the following values:\
10911///    _MM_MANT_SIGN_src     // sign = sign(src)\
10912///    _MM_MANT_SIGN_zero    // sign = 0\
10913///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10914/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10915///
10916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10917#[inline]
10918#[target_feature(enable = "avx512f")]
10919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10920#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10921#[rustc_legacy_const_generics(3, 4, 5)]
10922pub fn _mm512_mask_getmant_round_ps<
10923    const NORM: _MM_MANTISSA_NORM_ENUM,
10924    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10925    const SAE: i32,
10926>(
10927    src: __m512,
10928    k: __mmask16,
10929    a: __m512,
10930) -> __m512 {
10931    unsafe {
10932        static_assert_uimm_bits!(NORM, 4);
10933        static_assert_uimm_bits!(SIGN, 2);
10934        static_assert_mantissas_sae!(SAE);
10935        let a = a.as_f32x16();
10936        let src = src.as_f32x16();
10937        let r = vgetmantps(a, SIGN << 2 | NORM, src, k, SAE);
10938        transmute(r)
10939    }
10940}
10941
10942/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10943/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10944///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10945///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10946///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10947///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10948/// The sign is determined by sc which can take the following values:\
10949///    _MM_MANT_SIGN_src     // sign = sign(src)\
10950///    _MM_MANT_SIGN_zero    // sign = 0\
10951///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10952/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10958#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10959#[rustc_legacy_const_generics(2, 3, 4)]
10960pub fn _mm512_maskz_getmant_round_ps<
10961    const NORM: _MM_MANTISSA_NORM_ENUM,
10962    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10963    const SAE: i32,
10964>(
10965    k: __mmask16,
10966    a: __m512,
10967) -> __m512 {
10968    unsafe {
10969        static_assert_uimm_bits!(NORM, 4);
10970        static_assert_uimm_bits!(SIGN, 2);
10971        static_assert_mantissas_sae!(SAE);
10972        let a = a.as_f32x16();
10973        let r = vgetmantps(a, SIGN << 2 | NORM, f32x16::ZERO, k, SAE);
10974        transmute(r)
10975    }
10976}
10977
10978/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10979/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10980///    _MM_MANT_NORM_1_2     // interval [1, 2)\
10981///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
10982///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
10983///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10984/// The sign is determined by sc which can take the following values:\
10985///    _MM_MANT_SIGN_src     // sign = sign(src)\
10986///    _MM_MANT_SIGN_zero    // sign = 0\
10987///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
10988/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10989///
10990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10991#[inline]
10992#[target_feature(enable = "avx512f")]
10993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10994#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10995#[rustc_legacy_const_generics(1, 2, 3)]
10996pub fn _mm512_getmant_round_pd<
10997    const NORM: _MM_MANTISSA_NORM_ENUM,
10998    const SIGN: _MM_MANTISSA_SIGN_ENUM,
10999    const SAE: i32,
11000>(
11001    a: __m512d,
11002) -> __m512d {
11003    unsafe {
11004        static_assert_uimm_bits!(NORM, 4);
11005        static_assert_uimm_bits!(SIGN, 2);
11006        static_assert_mantissas_sae!(SAE);
11007        let a = a.as_f64x8();
11008        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, 0b11111111, SAE);
11009        transmute(r)
11010    }
11011}
11012
11013/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11014/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11015///    _MM_MANT_NORM_1_2     // interval [1, 2)\
11016///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
11017///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
11018///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11019/// The sign is determined by sc which can take the following values:\
11020///    _MM_MANT_SIGN_src     // sign = sign(src)\
11021///    _MM_MANT_SIGN_zero    // sign = 0\
11022///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
11023/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11024///
11025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
11026#[inline]
11027#[target_feature(enable = "avx512f")]
11028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11029#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
11030#[rustc_legacy_const_generics(3, 4, 5)]
11031pub fn _mm512_mask_getmant_round_pd<
11032    const NORM: _MM_MANTISSA_NORM_ENUM,
11033    const SIGN: _MM_MANTISSA_SIGN_ENUM,
11034    const SAE: i32,
11035>(
11036    src: __m512d,
11037    k: __mmask8,
11038    a: __m512d,
11039) -> __m512d {
11040    unsafe {
11041        static_assert_uimm_bits!(NORM, 4);
11042        static_assert_uimm_bits!(SIGN, 2);
11043        static_assert_mantissas_sae!(SAE);
11044        let a = a.as_f64x8();
11045        let src = src.as_f64x8();
11046        let r = vgetmantpd(a, SIGN << 2 | NORM, src, k, SAE);
11047        transmute(r)
11048    }
11049}
11050
11051/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
11052/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
11053///    _MM_MANT_NORM_1_2     // interval [1, 2)\
11054///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
11055///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
11056///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
11057/// The sign is determined by sc which can take the following values:\
11058///    _MM_MANT_SIGN_src     // sign = sign(src)\
11059///    _MM_MANT_SIGN_zero    // sign = 0\
11060///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
11061/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
11062///
11063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
11064#[inline]
11065#[target_feature(enable = "avx512f")]
11066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11067#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
11068#[rustc_legacy_const_generics(2, 3, 4)]
11069pub fn _mm512_maskz_getmant_round_pd<
11070    const NORM: _MM_MANTISSA_NORM_ENUM,
11071    const SIGN: _MM_MANTISSA_SIGN_ENUM,
11072    const SAE: i32,
11073>(
11074    k: __mmask8,
11075    a: __m512d,
11076) -> __m512d {
11077    unsafe {
11078        static_assert_uimm_bits!(NORM, 4);
11079        static_assert_uimm_bits!(SIGN, 2);
11080        static_assert_mantissas_sae!(SAE);
11081        let a = a.as_f64x8();
11082        let r = vgetmantpd(a, SIGN << 2 | NORM, f64x8::ZERO, k, SAE);
11083        transmute(r)
11084    }
11085}
11086
11087/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11088///
11089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
11090#[inline]
11091#[target_feature(enable = "avx512f")]
11092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11093#[cfg_attr(test, assert_instr(vcvtps2dq))]
11094pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
11095    unsafe {
11096        transmute(vcvtps2dq(
11097            a.as_f32x16(),
11098            i32x16::ZERO,
11099            0b11111111_11111111,
11100            _MM_FROUND_CUR_DIRECTION,
11101        ))
11102    }
11103}
11104
11105/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11106///
11107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
11108#[inline]
11109#[target_feature(enable = "avx512f")]
11110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11111#[cfg_attr(test, assert_instr(vcvtps2dq))]
11112pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11113    unsafe {
11114        transmute(vcvtps2dq(
11115            a.as_f32x16(),
11116            src.as_i32x16(),
11117            k,
11118            _MM_FROUND_CUR_DIRECTION,
11119        ))
11120    }
11121}
11122
11123/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11124///
11125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
11126#[inline]
11127#[target_feature(enable = "avx512f")]
11128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11129#[cfg_attr(test, assert_instr(vcvtps2dq))]
11130pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
11131    unsafe {
11132        transmute(vcvtps2dq(
11133            a.as_f32x16(),
11134            i32x16::ZERO,
11135            k,
11136            _MM_FROUND_CUR_DIRECTION,
11137        ))
11138    }
11139}
11140
11141/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11142///
11143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
11144#[inline]
11145#[target_feature(enable = "avx512f,avx512vl")]
11146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11147#[cfg_attr(test, assert_instr(vcvtps2dq))]
11148pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11149    unsafe {
11150        let convert = _mm256_cvtps_epi32(a);
11151        transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
11152    }
11153}
11154
11155/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11156///
11157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
11158#[inline]
11159#[target_feature(enable = "avx512f,avx512vl")]
11160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11161#[cfg_attr(test, assert_instr(vcvtps2dq))]
11162pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
11163    unsafe {
11164        let convert = _mm256_cvtps_epi32(a);
11165        transmute(simd_select_bitmask(k, convert.as_i32x8(), i32x8::ZERO))
11166    }
11167}
11168
11169/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11170///
11171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
11172#[inline]
11173#[target_feature(enable = "avx512f,avx512vl")]
11174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11175#[cfg_attr(test, assert_instr(vcvtps2dq))]
11176pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11177    unsafe {
11178        let convert = _mm_cvtps_epi32(a);
11179        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11180    }
11181}
11182
11183/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11184///
11185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
11186#[inline]
11187#[target_feature(enable = "avx512f,avx512vl")]
11188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11189#[cfg_attr(test, assert_instr(vcvtps2dq))]
11190pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
11191    unsafe {
11192        let convert = _mm_cvtps_epi32(a);
11193        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11194    }
11195}
11196
11197/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11198///
11199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
11200#[inline]
11201#[target_feature(enable = "avx512f")]
11202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11203#[cfg_attr(test, assert_instr(vcvtps2udq))]
11204pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
11205    unsafe {
11206        transmute(vcvtps2udq(
11207            a.as_f32x16(),
11208            u32x16::ZERO,
11209            0b11111111_11111111,
11210            _MM_FROUND_CUR_DIRECTION,
11211        ))
11212    }
11213}
11214
11215/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11216///
11217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
11218#[inline]
11219#[target_feature(enable = "avx512f")]
11220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11221#[cfg_attr(test, assert_instr(vcvtps2udq))]
11222pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
11223    unsafe {
11224        transmute(vcvtps2udq(
11225            a.as_f32x16(),
11226            src.as_u32x16(),
11227            k,
11228            _MM_FROUND_CUR_DIRECTION,
11229        ))
11230    }
11231}
11232
11233/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11234///
11235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
11236#[inline]
11237#[target_feature(enable = "avx512f")]
11238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11239#[cfg_attr(test, assert_instr(vcvtps2udq))]
11240pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
11241    unsafe {
11242        transmute(vcvtps2udq(
11243            a.as_f32x16(),
11244            u32x16::ZERO,
11245            k,
11246            _MM_FROUND_CUR_DIRECTION,
11247        ))
11248    }
11249}
11250
11251/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11252///
11253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
11254#[inline]
11255#[target_feature(enable = "avx512f,avx512vl")]
11256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11257#[cfg_attr(test, assert_instr(vcvtps2udq))]
11258pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
11259    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
11260}
11261
11262/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11263///
11264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
11265#[inline]
11266#[target_feature(enable = "avx512f,avx512vl")]
11267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11268#[cfg_attr(test, assert_instr(vcvtps2udq))]
11269pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
11270    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
11271}
11272
11273/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11274///
11275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
11276#[inline]
11277#[target_feature(enable = "avx512f,avx512vl")]
11278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11279#[cfg_attr(test, assert_instr(vcvtps2udq))]
11280pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11281    unsafe { transmute(vcvtps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
11282}
11283
11284/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11285///
11286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11287#[inline]
11288#[target_feature(enable = "avx512f,avx512vl")]
11289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11290#[cfg_attr(test, assert_instr(vcvtps2udq))]
11291pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11292    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
11293}
11294
11295/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11296///
11297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11298#[inline]
11299#[target_feature(enable = "avx512f,avx512vl")]
11300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11301#[cfg_attr(test, assert_instr(vcvtps2udq))]
11302pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11303    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
11304}
11305
11306/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11307///
11308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11309#[inline]
11310#[target_feature(enable = "avx512f,avx512vl")]
11311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11312#[cfg_attr(test, assert_instr(vcvtps2udq))]
11313pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11314    unsafe { transmute(vcvtps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
11315}
11316
11317/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11318///
11319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11320#[inline]
11321#[target_feature(enable = "avx512f")]
11322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11323#[cfg_attr(test, assert_instr(vcvtps2pd))]
11324pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11325    unsafe {
11326        transmute(vcvtps2pd(
11327            a.as_f32x8(),
11328            f64x8::ZERO,
11329            0b11111111,
11330            _MM_FROUND_CUR_DIRECTION,
11331        ))
11332    }
11333}
11334
11335/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11336///
11337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11338#[inline]
11339#[target_feature(enable = "avx512f")]
11340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11341#[cfg_attr(test, assert_instr(vcvtps2pd))]
11342pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11343    unsafe {
11344        transmute(vcvtps2pd(
11345            a.as_f32x8(),
11346            src.as_f64x8(),
11347            k,
11348            _MM_FROUND_CUR_DIRECTION,
11349        ))
11350    }
11351}
11352
11353/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11354///
11355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11356#[inline]
11357#[target_feature(enable = "avx512f")]
11358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11359#[cfg_attr(test, assert_instr(vcvtps2pd))]
11360pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11361    unsafe {
11362        transmute(vcvtps2pd(
11363            a.as_f32x8(),
11364            f64x8::ZERO,
11365            k,
11366            _MM_FROUND_CUR_DIRECTION,
11367        ))
11368    }
11369}
11370
11371/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11372///
11373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11374#[inline]
11375#[target_feature(enable = "avx512f")]
11376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11377#[cfg_attr(test, assert_instr(vcvtps2pd))]
11378pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11379    unsafe {
11380        transmute(vcvtps2pd(
11381            _mm512_castps512_ps256(v2).as_f32x8(),
11382            f64x8::ZERO,
11383            0b11111111,
11384            _MM_FROUND_CUR_DIRECTION,
11385        ))
11386    }
11387}
11388
11389/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11390///
11391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11392#[inline]
11393#[target_feature(enable = "avx512f")]
11394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11395#[cfg_attr(test, assert_instr(vcvtps2pd))]
11396pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11397    unsafe {
11398        transmute(vcvtps2pd(
11399            _mm512_castps512_ps256(v2).as_f32x8(),
11400            src.as_f64x8(),
11401            k,
11402            _MM_FROUND_CUR_DIRECTION,
11403        ))
11404    }
11405}
11406
11407/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11408///
11409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11410#[inline]
11411#[target_feature(enable = "avx512f")]
11412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11413#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11414pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11415    unsafe {
11416        transmute(vcvtpd2ps(
11417            a.as_f64x8(),
11418            f32x8::ZERO,
11419            0b11111111,
11420            _MM_FROUND_CUR_DIRECTION,
11421        ))
11422    }
11423}
11424
11425/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11426///
11427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11428#[inline]
11429#[target_feature(enable = "avx512f")]
11430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11431#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11432pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11433    unsafe {
11434        transmute(vcvtpd2ps(
11435            a.as_f64x8(),
11436            src.as_f32x8(),
11437            k,
11438            _MM_FROUND_CUR_DIRECTION,
11439        ))
11440    }
11441}
11442
11443/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11444///
11445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11446#[inline]
11447#[target_feature(enable = "avx512f")]
11448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11449#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11450pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11451    unsafe {
11452        transmute(vcvtpd2ps(
11453            a.as_f64x8(),
11454            f32x8::ZERO,
11455            k,
11456            _MM_FROUND_CUR_DIRECTION,
11457        ))
11458    }
11459}
11460
11461/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11462///
11463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11464#[inline]
11465#[target_feature(enable = "avx512f,avx512vl")]
11466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11467#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11468pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11469    unsafe {
11470        let convert = _mm256_cvtpd_ps(a);
11471        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
11472    }
11473}
11474
11475/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11476///
11477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11478#[inline]
11479#[target_feature(enable = "avx512f,avx512vl")]
11480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11481#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11482pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11483    unsafe {
11484        let convert = _mm256_cvtpd_ps(a);
11485        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11486    }
11487}
11488
11489/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11492#[inline]
11493#[target_feature(enable = "avx512f,avx512vl")]
11494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11497    unsafe { vcvtpd2ps128(a.as_f64x2(), src.as_f32x4(), k).as_m128() }
11498}
11499
11500/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11501///
11502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11503#[inline]
11504#[target_feature(enable = "avx512f,avx512vl")]
11505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11506#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11507pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11508    unsafe {
11509        let convert = _mm_cvtpd_ps(a);
11510        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
11511    }
11512}
11513
11514/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11515///
11516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11517#[inline]
11518#[target_feature(enable = "avx512f")]
11519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11520#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11521pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11522    unsafe {
11523        transmute(vcvtpd2dq(
11524            a.as_f64x8(),
11525            i32x8::ZERO,
11526            0b11111111,
11527            _MM_FROUND_CUR_DIRECTION,
11528        ))
11529    }
11530}
11531
11532/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11533///
11534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11535#[inline]
11536#[target_feature(enable = "avx512f")]
11537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11538#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11539pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11540    unsafe {
11541        transmute(vcvtpd2dq(
11542            a.as_f64x8(),
11543            src.as_i32x8(),
11544            k,
11545            _MM_FROUND_CUR_DIRECTION,
11546        ))
11547    }
11548}
11549
11550/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11551///
11552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11553#[inline]
11554#[target_feature(enable = "avx512f")]
11555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11556#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11557pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11558    unsafe {
11559        transmute(vcvtpd2dq(
11560            a.as_f64x8(),
11561            i32x8::ZERO,
11562            k,
11563            _MM_FROUND_CUR_DIRECTION,
11564        ))
11565    }
11566}
11567
11568/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11574#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11575pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11576    unsafe {
11577        let convert = _mm256_cvtpd_epi32(a);
11578        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
11579    }
11580}
11581
11582/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11589pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11590    unsafe {
11591        let convert = _mm256_cvtpd_epi32(a);
11592        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11593    }
11594}
11595
11596/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11602#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11603pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11604    unsafe { vcvtpd2dq128(a.as_f64x2(), src.as_i32x4(), k).as_m128i() }
11605}
11606
11607/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11608///
11609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11610#[inline]
11611#[target_feature(enable = "avx512f,avx512vl")]
11612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11613#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11614pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11615    unsafe {
11616        let convert = _mm_cvtpd_epi32(a);
11617        transmute(simd_select_bitmask(k, convert.as_i32x4(), i32x4::ZERO))
11618    }
11619}
11620
11621/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11622///
11623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11624#[inline]
11625#[target_feature(enable = "avx512f")]
11626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11627#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11628pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11629    unsafe {
11630        transmute(vcvtpd2udq(
11631            a.as_f64x8(),
11632            u32x8::ZERO,
11633            0b11111111,
11634            _MM_FROUND_CUR_DIRECTION,
11635        ))
11636    }
11637}
11638
11639/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11645#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11646pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11647    unsafe {
11648        transmute(vcvtpd2udq(
11649            a.as_f64x8(),
11650            src.as_u32x8(),
11651            k,
11652            _MM_FROUND_CUR_DIRECTION,
11653        ))
11654    }
11655}
11656
11657/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11658///
11659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11660#[inline]
11661#[target_feature(enable = "avx512f")]
11662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11663#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11664pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11665    unsafe {
11666        transmute(vcvtpd2udq(
11667            a.as_f64x8(),
11668            u32x8::ZERO,
11669            k,
11670            _MM_FROUND_CUR_DIRECTION,
11671        ))
11672    }
11673}
11674
11675/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11676///
11677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11678#[inline]
11679#[target_feature(enable = "avx512f,avx512vl")]
11680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11681#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11682pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11683    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, 0b11111111)) }
11684}
11685
11686/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11687///
11688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11689#[inline]
11690#[target_feature(enable = "avx512f,avx512vl")]
11691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11692#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11693pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11694    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) }
11695}
11696
11697/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11698///
11699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11700#[inline]
11701#[target_feature(enable = "avx512f,avx512vl")]
11702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11703#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11704pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11705    unsafe { transmute(vcvtpd2udq256(a.as_f64x4(), u32x4::ZERO, k)) }
11706}
11707
11708/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11709///
11710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11711#[inline]
11712#[target_feature(enable = "avx512f,avx512vl")]
11713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11714#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11715pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11716    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, 0b11111111)) }
11717}
11718
11719/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11720///
11721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11722#[inline]
11723#[target_feature(enable = "avx512f,avx512vl")]
11724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11725#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11726pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11727    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) }
11728}
11729
11730/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11731///
11732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11733#[inline]
11734#[target_feature(enable = "avx512f,avx512vl")]
11735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11736#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11737pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11738    unsafe { transmute(vcvtpd2udq128(a.as_f64x2(), u32x4::ZERO, k)) }
11739}
11740
11741/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11742///
11743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11744#[inline]
11745#[target_feature(enable = "avx512f")]
11746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11747#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11748pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11749    unsafe {
11750        let r: f32x8 = vcvtpd2ps(
11751            v2.as_f64x8(),
11752            f32x8::ZERO,
11753            0b11111111,
11754            _MM_FROUND_CUR_DIRECTION,
11755        );
11756        simd_shuffle!(
11757            r,
11758            f32x8::ZERO,
11759            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11760        )
11761    }
11762}
11763
11764/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11765///
11766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11767#[inline]
11768#[target_feature(enable = "avx512f")]
11769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11770#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11771pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11772    unsafe {
11773        let r: f32x8 = vcvtpd2ps(
11774            v2.as_f64x8(),
11775            _mm512_castps512_ps256(src).as_f32x8(),
11776            k,
11777            _MM_FROUND_CUR_DIRECTION,
11778        );
11779        simd_shuffle!(
11780            r,
11781            f32x8::ZERO,
11782            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11783        )
11784    }
11785}
11786
11787/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11788///
11789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11790#[inline]
11791#[target_feature(enable = "avx512f")]
11792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11793#[cfg_attr(test, assert_instr(vpmovsxbd))]
11794#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11795pub const fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11796    unsafe {
11797        let a = a.as_i8x16();
11798        transmute::<i32x16, _>(simd_cast(a))
11799    }
11800}
11801
11802/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11803///
11804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11805#[inline]
11806#[target_feature(enable = "avx512f")]
11807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11808#[cfg_attr(test, assert_instr(vpmovsxbd))]
11809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11810pub const fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11811    unsafe {
11812        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11813        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
11814    }
11815}
11816
11817/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11818///
11819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11820#[inline]
11821#[target_feature(enable = "avx512f")]
11822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11823#[cfg_attr(test, assert_instr(vpmovsxbd))]
11824#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11825pub const fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11826    unsafe {
11827        let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
11828        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
11829    }
11830}
11831
11832/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11833///
11834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11835#[inline]
11836#[target_feature(enable = "avx512f,avx512vl")]
11837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11838#[cfg_attr(test, assert_instr(vpmovsxbd))]
11839#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11840pub const fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11841    unsafe {
11842        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11843        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
11844    }
11845}
11846
11847/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11848///
11849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11850#[inline]
11851#[target_feature(enable = "avx512f,avx512vl")]
11852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11853#[cfg_attr(test, assert_instr(vpmovsxbd))]
11854#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11855pub const fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11856    unsafe {
11857        let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
11858        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
11859    }
11860}
11861
11862/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11863///
11864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11865#[inline]
11866#[target_feature(enable = "avx512f,avx512vl")]
11867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11868#[cfg_attr(test, assert_instr(vpmovsxbd))]
11869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11870pub const fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11871    unsafe {
11872        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11873        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
11874    }
11875}
11876
11877/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11878///
11879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11880#[inline]
11881#[target_feature(enable = "avx512f,avx512vl")]
11882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11883#[cfg_attr(test, assert_instr(vpmovsxbd))]
11884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11885pub const fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11886    unsafe {
11887        let convert = _mm_cvtepi8_epi32(a).as_i32x4();
11888        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
11889    }
11890}
11891
11892/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11895#[inline]
11896#[target_feature(enable = "avx512f")]
11897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11898#[cfg_attr(test, assert_instr(vpmovsxbq))]
11899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11900pub const fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11901    unsafe {
11902        let a = a.as_i8x16();
11903        let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11904        transmute::<i64x8, _>(simd_cast(v64))
11905    }
11906}
11907
11908/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11909///
11910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11911#[inline]
11912#[target_feature(enable = "avx512f")]
11913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11914#[cfg_attr(test, assert_instr(vpmovsxbq))]
11915#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11916pub const fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11917    unsafe {
11918        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11919        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
11920    }
11921}
11922
11923/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11924///
11925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11926#[inline]
11927#[target_feature(enable = "avx512f")]
11928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11929#[cfg_attr(test, assert_instr(vpmovsxbq))]
11930#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11931pub const fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11932    unsafe {
11933        let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
11934        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
11935    }
11936}
11937
11938/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11939///
11940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11941#[inline]
11942#[target_feature(enable = "avx512f,avx512vl")]
11943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11944#[cfg_attr(test, assert_instr(vpmovsxbq))]
11945#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11946pub const fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11947    unsafe {
11948        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11949        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
11950    }
11951}
11952
11953/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11954///
11955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11956#[inline]
11957#[target_feature(enable = "avx512f,avx512vl")]
11958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11959#[cfg_attr(test, assert_instr(vpmovsxbq))]
11960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11961pub const fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11962    unsafe {
11963        let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
11964        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
11965    }
11966}
11967
11968/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11969///
11970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11971#[inline]
11972#[target_feature(enable = "avx512f,avx512vl")]
11973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11974#[cfg_attr(test, assert_instr(vpmovsxbq))]
11975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11976pub const fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11977    unsafe {
11978        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11979        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
11980    }
11981}
11982
11983/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11984///
11985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11986#[inline]
11987#[target_feature(enable = "avx512f,avx512vl")]
11988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11989#[cfg_attr(test, assert_instr(vpmovsxbq))]
11990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
11991pub const fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11992    unsafe {
11993        let convert = _mm_cvtepi8_epi64(a).as_i64x2();
11994        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
11995    }
11996}
11997
11998/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11999///
12000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
12001#[inline]
12002#[target_feature(enable = "avx512f")]
12003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12004#[cfg_attr(test, assert_instr(vpmovzxbd))]
12005#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12006pub const fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
12007    unsafe {
12008        let a = a.as_u8x16();
12009        transmute::<i32x16, _>(simd_cast(a))
12010    }
12011}
12012
12013/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12014///
12015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
12016#[inline]
12017#[target_feature(enable = "avx512f")]
12018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12019#[cfg_attr(test, assert_instr(vpmovzxbd))]
12020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12021pub const fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
12022    unsafe {
12023        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
12024        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12025    }
12026}
12027
12028/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12029///
12030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
12031#[inline]
12032#[target_feature(enable = "avx512f")]
12033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12034#[cfg_attr(test, assert_instr(vpmovzxbd))]
12035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12036pub const fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
12037    unsafe {
12038        let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
12039        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12040    }
12041}
12042
12043/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12044///
12045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
12046#[inline]
12047#[target_feature(enable = "avx512f,avx512vl")]
12048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12049#[cfg_attr(test, assert_instr(vpmovzxbd))]
12050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12051pub const fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12052    unsafe {
12053        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
12054        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12055    }
12056}
12057
12058/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12059///
12060/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
12061#[inline]
12062#[target_feature(enable = "avx512f,avx512vl")]
12063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12064#[cfg_attr(test, assert_instr(vpmovzxbd))]
12065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12066pub const fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
12067    unsafe {
12068        let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
12069        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12070    }
12071}
12072
12073/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12074///
12075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
12076#[inline]
12077#[target_feature(enable = "avx512f,avx512vl")]
12078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12079#[cfg_attr(test, assert_instr(vpmovzxbd))]
12080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12081pub const fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082    unsafe {
12083        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
12084        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12085    }
12086}
12087
12088/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12094#[cfg_attr(test, assert_instr(vpmovzxbd))]
12095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12096pub const fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
12097    unsafe {
12098        let convert = _mm_cvtepu8_epi32(a).as_i32x4();
12099        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12100    }
12101}
12102
12103/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
12104///
12105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
12106#[inline]
12107#[target_feature(enable = "avx512f")]
12108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12109#[cfg_attr(test, assert_instr(vpmovzxbq))]
12110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12111pub const fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
12112    unsafe {
12113        let a = a.as_u8x16();
12114        let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
12115        transmute::<i64x8, _>(simd_cast(v64))
12116    }
12117}
12118
12119/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12120///
12121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
12122#[inline]
12123#[target_feature(enable = "avx512f")]
12124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12125#[cfg_attr(test, assert_instr(vpmovzxbq))]
12126#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12127pub const fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12128    unsafe {
12129        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
12130        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12131    }
12132}
12133
12134/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12135///
12136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
12137#[inline]
12138#[target_feature(enable = "avx512f")]
12139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12140#[cfg_attr(test, assert_instr(vpmovzxbq))]
12141#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12142pub const fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
12143    unsafe {
12144        let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
12145        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12146    }
12147}
12148
12149/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12150///
12151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
12152#[inline]
12153#[target_feature(enable = "avx512f,avx512vl")]
12154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12155#[cfg_attr(test, assert_instr(vpmovzxbq))]
12156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12157pub const fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12158    unsafe {
12159        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
12160        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12161    }
12162}
12163
12164/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12165///
12166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
12167#[inline]
12168#[target_feature(enable = "avx512f,avx512vl")]
12169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12170#[cfg_attr(test, assert_instr(vpmovzxbq))]
12171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12172pub const fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
12173    unsafe {
12174        let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
12175        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12176    }
12177}
12178
12179/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12180///
12181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
12182#[inline]
12183#[target_feature(enable = "avx512f,avx512vl")]
12184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12185#[cfg_attr(test, assert_instr(vpmovzxbq))]
12186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12187pub const fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12188    unsafe {
12189        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
12190        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12191    }
12192}
12193
12194/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12195///
12196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
12197#[inline]
12198#[target_feature(enable = "avx512f,avx512vl")]
12199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12200#[cfg_attr(test, assert_instr(vpmovzxbq))]
12201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12202pub const fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
12203    unsafe {
12204        let convert = _mm_cvtepu8_epi64(a).as_i64x2();
12205        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12206    }
12207}
12208
12209/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12210///
12211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
12212#[inline]
12213#[target_feature(enable = "avx512f")]
12214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12215#[cfg_attr(test, assert_instr(vpmovsxwd))]
12216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12217pub const fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
12218    unsafe {
12219        let a = a.as_i16x16();
12220        transmute::<i32x16, _>(simd_cast(a))
12221    }
12222}
12223
12224/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12225///
12226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
12227#[inline]
12228#[target_feature(enable = "avx512f")]
12229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12230#[cfg_attr(test, assert_instr(vpmovsxwd))]
12231#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12232pub const fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12233    unsafe {
12234        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
12235        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12236    }
12237}
12238
12239/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12240///
12241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
12242#[inline]
12243#[target_feature(enable = "avx512f")]
12244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12245#[cfg_attr(test, assert_instr(vpmovsxwd))]
12246#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12247pub const fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12248    unsafe {
12249        let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
12250        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12251    }
12252}
12253
12254/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12255///
12256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
12257#[inline]
12258#[target_feature(enable = "avx512f,avx512vl")]
12259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12260#[cfg_attr(test, assert_instr(vpmovsxwd))]
12261#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12262pub const fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12263    unsafe {
12264        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
12265        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12266    }
12267}
12268
12269/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12270///
12271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
12272#[inline]
12273#[target_feature(enable = "avx512f,avx512vl")]
12274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12275#[cfg_attr(test, assert_instr(vpmovsxwd))]
12276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12277pub const fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12278    unsafe {
12279        let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
12280        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12281    }
12282}
12283
12284/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12290#[cfg_attr(test, assert_instr(vpmovsxwd))]
12291#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12292pub const fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12293    unsafe {
12294        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12295        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12296    }
12297}
12298
12299/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12300///
12301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
12302#[inline]
12303#[target_feature(enable = "avx512f,avx512vl")]
12304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12305#[cfg_attr(test, assert_instr(vpmovsxwd))]
12306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12307pub const fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12308    unsafe {
12309        let convert = _mm_cvtepi16_epi32(a).as_i32x4();
12310        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12311    }
12312}
12313
12314/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12315///
12316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12317#[inline]
12318#[target_feature(enable = "avx512f")]
12319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12320#[cfg_attr(test, assert_instr(vpmovsxwq))]
12321#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12322pub const fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12323    unsafe {
12324        let a = a.as_i16x8();
12325        transmute::<i64x8, _>(simd_cast(a))
12326    }
12327}
12328
12329/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12330///
12331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12332#[inline]
12333#[target_feature(enable = "avx512f")]
12334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12335#[cfg_attr(test, assert_instr(vpmovsxwq))]
12336#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12337pub const fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12338    unsafe {
12339        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12340        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12341    }
12342}
12343
12344/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12345///
12346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12347#[inline]
12348#[target_feature(enable = "avx512f")]
12349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12350#[cfg_attr(test, assert_instr(vpmovsxwq))]
12351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12352pub const fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12353    unsafe {
12354        let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
12355        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12356    }
12357}
12358
12359/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12360///
12361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12362#[inline]
12363#[target_feature(enable = "avx512f,avx512vl")]
12364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12365#[cfg_attr(test, assert_instr(vpmovsxwq))]
12366#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12367pub const fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12368    unsafe {
12369        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12370        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12371    }
12372}
12373
12374/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12375///
12376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12377#[inline]
12378#[target_feature(enable = "avx512f,avx512vl")]
12379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12380#[cfg_attr(test, assert_instr(vpmovsxwq))]
12381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12382pub const fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12383    unsafe {
12384        let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
12385        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12386    }
12387}
12388
12389/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12390///
12391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12392#[inline]
12393#[target_feature(enable = "avx512f,avx512vl")]
12394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12395#[cfg_attr(test, assert_instr(vpmovsxwq))]
12396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12397pub const fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12398    unsafe {
12399        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12400        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12401    }
12402}
12403
12404/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12405///
12406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12407#[inline]
12408#[target_feature(enable = "avx512f,avx512vl")]
12409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12410#[cfg_attr(test, assert_instr(vpmovsxwq))]
12411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12412pub const fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12413    unsafe {
12414        let convert = _mm_cvtepi16_epi64(a).as_i64x2();
12415        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12416    }
12417}
12418
12419/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12420///
12421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12422#[inline]
12423#[target_feature(enable = "avx512f")]
12424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12425#[cfg_attr(test, assert_instr(vpmovzxwd))]
12426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12427pub const fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12428    unsafe {
12429        let a = a.as_u16x16();
12430        transmute::<i32x16, _>(simd_cast(a))
12431    }
12432}
12433
12434/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12435///
12436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12437#[inline]
12438#[target_feature(enable = "avx512f")]
12439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12440#[cfg_attr(test, assert_instr(vpmovzxwd))]
12441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12442pub const fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12443    unsafe {
12444        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12445        transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
12446    }
12447}
12448
12449/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12450///
12451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12452#[inline]
12453#[target_feature(enable = "avx512f")]
12454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12455#[cfg_attr(test, assert_instr(vpmovzxwd))]
12456#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12457pub const fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12458    unsafe {
12459        let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
12460        transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
12461    }
12462}
12463
12464/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12465///
12466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12467#[inline]
12468#[target_feature(enable = "avx512f,avx512vl")]
12469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12470#[cfg_attr(test, assert_instr(vpmovzxwd))]
12471#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12472pub const fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12473    unsafe {
12474        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12475        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
12476    }
12477}
12478
12479/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12480///
12481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12482#[inline]
12483#[target_feature(enable = "avx512f,avx512vl")]
12484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12485#[cfg_attr(test, assert_instr(vpmovzxwd))]
12486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12487pub const fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12488    unsafe {
12489        let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
12490        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
12491    }
12492}
12493
12494/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12497#[inline]
12498#[target_feature(enable = "avx512f,avx512vl")]
12499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12500#[cfg_attr(test, assert_instr(vpmovzxwd))]
12501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12502pub const fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12503    unsafe {
12504        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12505        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
12506    }
12507}
12508
12509/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12510///
12511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12512#[inline]
12513#[target_feature(enable = "avx512f,avx512vl")]
12514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12515#[cfg_attr(test, assert_instr(vpmovzxwd))]
12516#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12517pub const fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12518    unsafe {
12519        let convert = _mm_cvtepu16_epi32(a).as_i32x4();
12520        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
12521    }
12522}
12523
12524/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12525///
12526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12527#[inline]
12528#[target_feature(enable = "avx512f")]
12529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12530#[cfg_attr(test, assert_instr(vpmovzxwq))]
12531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12532pub const fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12533    unsafe {
12534        let a = a.as_u16x8();
12535        transmute::<i64x8, _>(simd_cast(a))
12536    }
12537}
12538
12539/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12540///
12541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12542#[inline]
12543#[target_feature(enable = "avx512f")]
12544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12545#[cfg_attr(test, assert_instr(vpmovzxwq))]
12546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12547pub const fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12548    unsafe {
12549        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12550        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12551    }
12552}
12553
12554/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12555///
12556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12557#[inline]
12558#[target_feature(enable = "avx512f")]
12559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12560#[cfg_attr(test, assert_instr(vpmovzxwq))]
12561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12562pub const fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12563    unsafe {
12564        let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
12565        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12566    }
12567}
12568
12569/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12570///
12571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12572#[inline]
12573#[target_feature(enable = "avx512f,avx512vl")]
12574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12575#[cfg_attr(test, assert_instr(vpmovzxwq))]
12576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12577pub const fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12578    unsafe {
12579        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12580        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12581    }
12582}
12583
12584/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12585///
12586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12587#[inline]
12588#[target_feature(enable = "avx512f,avx512vl")]
12589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12590#[cfg_attr(test, assert_instr(vpmovzxwq))]
12591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12592pub const fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12593    unsafe {
12594        let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
12595        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12596    }
12597}
12598
12599/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12600///
12601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12602#[inline]
12603#[target_feature(enable = "avx512f,avx512vl")]
12604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12605#[cfg_attr(test, assert_instr(vpmovzxwq))]
12606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12607pub const fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12608    unsafe {
12609        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12610        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12611    }
12612}
12613
12614/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12615///
12616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12617#[inline]
12618#[target_feature(enable = "avx512f,avx512vl")]
12619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12620#[cfg_attr(test, assert_instr(vpmovzxwq))]
12621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12622pub const fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12623    unsafe {
12624        let convert = _mm_cvtepu16_epi64(a).as_i64x2();
12625        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12626    }
12627}
12628
12629/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12630///
12631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12632#[inline]
12633#[target_feature(enable = "avx512f")]
12634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12635#[cfg_attr(test, assert_instr(vpmovsxdq))]
12636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12637pub const fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12638    unsafe {
12639        let a = a.as_i32x8();
12640        transmute::<i64x8, _>(simd_cast(a))
12641    }
12642}
12643
12644/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12645///
12646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12647#[inline]
12648#[target_feature(enable = "avx512f")]
12649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12650#[cfg_attr(test, assert_instr(vpmovsxdq))]
12651#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12652pub const fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12653    unsafe {
12654        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12655        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12656    }
12657}
12658
12659/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12660///
12661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12662#[inline]
12663#[target_feature(enable = "avx512f")]
12664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12665#[cfg_attr(test, assert_instr(vpmovsxdq))]
12666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12667pub const fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12668    unsafe {
12669        let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
12670        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12671    }
12672}
12673
12674/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12675///
12676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12677#[inline]
12678#[target_feature(enable = "avx512f,avx512vl")]
12679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12680#[cfg_attr(test, assert_instr(vpmovsxdq))]
12681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12682pub const fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12683    unsafe {
12684        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12685        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12686    }
12687}
12688
12689/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12690///
12691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12692#[inline]
12693#[target_feature(enable = "avx512f,avx512vl")]
12694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12695#[cfg_attr(test, assert_instr(vpmovsxdq))]
12696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12697pub const fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12698    unsafe {
12699        let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
12700        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12701    }
12702}
12703
12704/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12707#[inline]
12708#[target_feature(enable = "avx512f,avx512vl")]
12709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12710#[cfg_attr(test, assert_instr(vpmovsxdq))]
12711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12712pub const fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12713    unsafe {
12714        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12715        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12716    }
12717}
12718
12719/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12720///
12721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12722#[inline]
12723#[target_feature(enable = "avx512f,avx512vl")]
12724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12725#[cfg_attr(test, assert_instr(vpmovsxdq))]
12726#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12727pub const fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12728    unsafe {
12729        let convert = _mm_cvtepi32_epi64(a).as_i64x2();
12730        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12731    }
12732}
12733
12734/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12735///
12736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12737#[inline]
12738#[target_feature(enable = "avx512f")]
12739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12740#[cfg_attr(test, assert_instr(vpmovzxdq))]
12741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12742pub const fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12743    unsafe {
12744        let a = a.as_u32x8();
12745        transmute::<i64x8, _>(simd_cast(a))
12746    }
12747}
12748
12749/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12750///
12751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12752#[inline]
12753#[target_feature(enable = "avx512f")]
12754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12755#[cfg_attr(test, assert_instr(vpmovzxdq))]
12756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12757pub const fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12758    unsafe {
12759        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12760        transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
12761    }
12762}
12763
12764/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12765///
12766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12767#[inline]
12768#[target_feature(enable = "avx512f")]
12769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12770#[cfg_attr(test, assert_instr(vpmovzxdq))]
12771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12772pub const fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12773    unsafe {
12774        let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
12775        transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
12776    }
12777}
12778
12779/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12780///
12781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12782#[inline]
12783#[target_feature(enable = "avx512f,avx512vl")]
12784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12785#[cfg_attr(test, assert_instr(vpmovzxdq))]
12786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12787pub const fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12788    unsafe {
12789        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12790        transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
12791    }
12792}
12793
12794/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12795///
12796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12797#[inline]
12798#[target_feature(enable = "avx512f,avx512vl")]
12799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12800#[cfg_attr(test, assert_instr(vpmovzxdq))]
12801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12802pub const fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12803    unsafe {
12804        let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
12805        transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
12806    }
12807}
12808
12809/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12810///
12811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12812#[inline]
12813#[target_feature(enable = "avx512f,avx512vl")]
12814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12815#[cfg_attr(test, assert_instr(vpmovzxdq))]
12816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12817pub const fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12818    unsafe {
12819        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12820        transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
12821    }
12822}
12823
12824/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12825///
12826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12827#[inline]
12828#[target_feature(enable = "avx512f,avx512vl")]
12829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12830#[cfg_attr(test, assert_instr(vpmovzxdq))]
12831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12832pub const fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12833    unsafe {
12834        let convert = _mm_cvtepu32_epi64(a).as_i64x2();
12835        transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
12836    }
12837}
12838
12839/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12840///
12841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12842#[inline]
12843#[target_feature(enable = "avx512f")]
12844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12845#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12846#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12847pub const fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12848    unsafe {
12849        let a = a.as_i32x16();
12850        transmute::<f32x16, _>(simd_cast(a))
12851    }
12852}
12853
12854/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12855///
12856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12857#[inline]
12858#[target_feature(enable = "avx512f")]
12859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12860#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12862pub const fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12863    unsafe {
12864        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12865        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
12866    }
12867}
12868
12869/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12870///
12871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12872#[inline]
12873#[target_feature(enable = "avx512f")]
12874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12875#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12877pub const fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12878    unsafe {
12879        let convert = _mm512_cvtepi32_ps(a).as_f32x16();
12880        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
12881    }
12882}
12883
12884/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12885///
12886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12887#[inline]
12888#[target_feature(enable = "avx512f,avx512vl")]
12889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12890#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12891#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12892pub const fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12893    unsafe {
12894        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12895        transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
12896    }
12897}
12898
12899/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12900///
12901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12902#[inline]
12903#[target_feature(enable = "avx512f,avx512vl")]
12904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12905#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12907pub const fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12908    unsafe {
12909        let convert = _mm256_cvtepi32_ps(a).as_f32x8();
12910        transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
12911    }
12912}
12913
12914/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12915///
12916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12917#[inline]
12918#[target_feature(enable = "avx512f,avx512vl")]
12919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12920#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12922pub const fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12923    unsafe {
12924        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12925        transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
12926    }
12927}
12928
12929/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12930///
12931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12932#[inline]
12933#[target_feature(enable = "avx512f,avx512vl")]
12934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12935#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12937pub const fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12938    unsafe {
12939        let convert = _mm_cvtepi32_ps(a).as_f32x4();
12940        transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
12941    }
12942}
12943
12944/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12945///
12946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12947#[inline]
12948#[target_feature(enable = "avx512f")]
12949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12950#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12951#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12952pub const fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12953    unsafe {
12954        let a = a.as_i32x8();
12955        transmute::<f64x8, _>(simd_cast(a))
12956    }
12957}
12958
12959/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12962#[inline]
12963#[target_feature(enable = "avx512f")]
12964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12965#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12967pub const fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12968    unsafe {
12969        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12970        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
12971    }
12972}
12973
12974/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12975///
12976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12977#[inline]
12978#[target_feature(enable = "avx512f")]
12979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12980#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12982pub const fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12983    unsafe {
12984        let convert = _mm512_cvtepi32_pd(a).as_f64x8();
12985        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
12986    }
12987}
12988
12989/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12990///
12991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12992#[inline]
12993#[target_feature(enable = "avx512f,avx512vl")]
12994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12995#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
12997pub const fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12998    unsafe {
12999        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
13000        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
13001    }
13002}
13003
13004/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13005///
13006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
13007#[inline]
13008#[target_feature(enable = "avx512f,avx512vl")]
13009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13010#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13012pub const fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
13013    unsafe {
13014        let convert = _mm256_cvtepi32_pd(a).as_f64x4();
13015        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
13016    }
13017}
13018
13019/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13020///
13021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
13022#[inline]
13023#[target_feature(enable = "avx512f,avx512vl")]
13024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13025#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13026#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13027pub const fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13028    unsafe {
13029        let convert = _mm_cvtepi32_pd(a).as_f64x2();
13030        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
13031    }
13032}
13033
13034/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
13037#[inline]
13038#[target_feature(enable = "avx512f,avx512vl")]
13039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13040#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13042pub const fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
13043    unsafe {
13044        let convert = _mm_cvtepi32_pd(a).as_f64x2();
13045        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
13046    }
13047}
13048
13049/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
13050///
13051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
13052#[inline]
13053#[target_feature(enable = "avx512f")]
13054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13055#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13057pub const fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
13058    unsafe {
13059        let a = a.as_u32x16();
13060        transmute::<f32x16, _>(simd_cast(a))
13061    }
13062}
13063
13064/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13065///
13066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
13067#[inline]
13068#[target_feature(enable = "avx512f")]
13069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13070#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13071#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13072pub const fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
13073    unsafe {
13074        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
13075        transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
13076    }
13077}
13078
13079/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13080///
13081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
13082#[inline]
13083#[target_feature(enable = "avx512f")]
13084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13085#[cfg_attr(test, assert_instr(vcvtudq2ps))]
13086#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13087pub const fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
13088    unsafe {
13089        let convert = _mm512_cvtepu32_ps(a).as_f32x16();
13090        transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
13091    }
13092}
13093
13094/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13095///
13096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
13097#[inline]
13098#[target_feature(enable = "avx512f")]
13099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13100#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13101#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13102pub const fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
13103    unsafe {
13104        let a = a.as_u32x8();
13105        transmute::<f64x8, _>(simd_cast(a))
13106    }
13107}
13108
13109/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
13112#[inline]
13113#[target_feature(enable = "avx512f")]
13114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13115#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13116#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13117pub const fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
13118    unsafe {
13119        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
13120        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
13121    }
13122}
13123
13124/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13125///
13126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
13127#[inline]
13128#[target_feature(enable = "avx512f")]
13129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13130#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13131#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13132pub const fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
13133    unsafe {
13134        let convert = _mm512_cvtepu32_pd(a).as_f64x8();
13135        transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
13136    }
13137}
13138
13139/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13140///
13141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
13142#[inline]
13143#[target_feature(enable = "avx512f,avx512vl")]
13144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13145#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13147pub const fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
13148    unsafe {
13149        let a = a.as_u32x4();
13150        transmute::<f64x4, _>(simd_cast(a))
13151    }
13152}
13153
13154/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13155///
13156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
13157#[inline]
13158#[target_feature(enable = "avx512f,avx512vl")]
13159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13160#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13162pub const fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
13163    unsafe {
13164        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
13165        transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
13166    }
13167}
13168
13169/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13170///
13171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
13172#[inline]
13173#[target_feature(enable = "avx512f,avx512vl")]
13174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13175#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13177pub const fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
13178    unsafe {
13179        let convert = _mm256_cvtepu32_pd(a).as_f64x4();
13180        transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
13181    }
13182}
13183
13184/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13190#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13191#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13192pub const fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
13193    unsafe {
13194        let a = a.as_u32x4();
13195        let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
13196        transmute::<f64x2, _>(simd_cast(u64))
13197    }
13198}
13199
13200/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13201///
13202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
13203#[inline]
13204#[target_feature(enable = "avx512f,avx512vl")]
13205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13206#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13208pub const fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
13209    unsafe {
13210        let convert = _mm_cvtepu32_pd(a).as_f64x2();
13211        transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
13212    }
13213}
13214
13215/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13216///
13217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
13218#[inline]
13219#[target_feature(enable = "avx512f,avx512vl")]
13220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13221#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13223pub const fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
13224    unsafe {
13225        let convert = _mm_cvtepu32_pd(a).as_f64x2();
13226        transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
13227    }
13228}
13229
13230/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13231///
13232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
13233#[inline]
13234#[target_feature(enable = "avx512f")]
13235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13236#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13238pub const fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
13239    unsafe {
13240        let v2 = v2.as_i32x16();
13241        let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
13242        transmute::<f64x8, _>(simd_cast(v256))
13243    }
13244}
13245
13246/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13247///
13248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
13249#[inline]
13250#[target_feature(enable = "avx512f")]
13251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13252#[cfg_attr(test, assert_instr(vcvtdq2pd))]
13253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13254pub const fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13255    unsafe {
13256        let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
13257        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
13258    }
13259}
13260
13261/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
13262///
13263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
13264#[inline]
13265#[target_feature(enable = "avx512f")]
13266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13267#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13269pub const fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
13270    unsafe {
13271        let v2 = v2.as_u32x16();
13272        let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
13273        transmute::<f64x8, _>(simd_cast(v256))
13274    }
13275}
13276
13277/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13278///
13279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
13280#[inline]
13281#[target_feature(enable = "avx512f")]
13282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13283#[cfg_attr(test, assert_instr(vcvtudq2pd))]
13284#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13285pub const fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
13286    unsafe {
13287        let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
13288        transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
13289    }
13290}
13291
13292/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13293///
13294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
13295#[inline]
13296#[target_feature(enable = "avx512f")]
13297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13298#[cfg_attr(test, assert_instr(vpmovdw))]
13299#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13300pub const fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
13301    unsafe {
13302        let a = a.as_i32x16();
13303        transmute::<i16x16, _>(simd_cast(a))
13304    }
13305}
13306
13307/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13308///
13309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
13310#[inline]
13311#[target_feature(enable = "avx512f")]
13312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13313#[cfg_attr(test, assert_instr(vpmovdw))]
13314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13315pub const fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13316    unsafe {
13317        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
13318        transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
13319    }
13320}
13321
13322/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13323///
13324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
13325#[inline]
13326#[target_feature(enable = "avx512f")]
13327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13328#[cfg_attr(test, assert_instr(vpmovdw))]
13329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13330pub const fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13331    unsafe {
13332        let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
13333        transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
13334    }
13335}
13336
13337/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13338///
13339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
13340#[inline]
13341#[target_feature(enable = "avx512f,avx512vl")]
13342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13343#[cfg_attr(test, assert_instr(vpmovdw))]
13344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13345pub const fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
13346    unsafe {
13347        let a = a.as_i32x8();
13348        transmute::<i16x8, _>(simd_cast(a))
13349    }
13350}
13351
13352/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13353///
13354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
13355#[inline]
13356#[target_feature(enable = "avx512f,avx512vl")]
13357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13358#[cfg_attr(test, assert_instr(vpmovdw))]
13359#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13360pub const fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13361    unsafe {
13362        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
13363        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13364    }
13365}
13366
13367/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
13370#[inline]
13371#[target_feature(enable = "avx512f,avx512vl")]
13372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13373#[cfg_attr(test, assert_instr(vpmovdw))]
13374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13375pub const fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13376    unsafe {
13377        let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
13378        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13379    }
13380}
13381
13382/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13383///
13384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13385#[inline]
13386#[target_feature(enable = "avx512f,avx512vl")]
13387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13388#[cfg_attr(test, assert_instr(vpmovdw))]
13389pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13390    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13391}
13392
13393/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13394///
13395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13396#[inline]
13397#[target_feature(enable = "avx512f,avx512vl")]
13398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13399#[cfg_attr(test, assert_instr(vpmovdw))]
13400pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13401    unsafe { transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13402}
13403
13404/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13405///
13406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13407#[inline]
13408#[target_feature(enable = "avx512f,avx512vl")]
13409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13410#[cfg_attr(test, assert_instr(vpmovdw))]
13411pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13412    unsafe { transmute(vpmovdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13413}
13414
13415/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13416///
13417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13418#[inline]
13419#[target_feature(enable = "avx512f")]
13420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13421#[cfg_attr(test, assert_instr(vpmovdb))]
13422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13423pub const fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13424    unsafe {
13425        let a = a.as_i32x16();
13426        transmute::<i8x16, _>(simd_cast(a))
13427    }
13428}
13429
13430/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13431///
13432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13433#[inline]
13434#[target_feature(enable = "avx512f")]
13435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13436#[cfg_attr(test, assert_instr(vpmovdb))]
13437#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13438pub const fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13439    unsafe {
13440        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13441        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
13442    }
13443}
13444
13445/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13446///
13447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13448#[inline]
13449#[target_feature(enable = "avx512f")]
13450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13451#[cfg_attr(test, assert_instr(vpmovdb))]
13452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13453pub const fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13454    unsafe {
13455        let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
13456        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
13457    }
13458}
13459
13460/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13461///
13462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13463#[inline]
13464#[target_feature(enable = "avx512f,avx512vl")]
13465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13466#[cfg_attr(test, assert_instr(vpmovdb))]
13467pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13468    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
13469}
13470
13471/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13472///
13473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13474#[inline]
13475#[target_feature(enable = "avx512f,avx512vl")]
13476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13477#[cfg_attr(test, assert_instr(vpmovdb))]
13478pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13479    unsafe { transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k)) }
13480}
13481
13482/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13483///
13484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13485#[inline]
13486#[target_feature(enable = "avx512f,avx512vl")]
13487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13488#[cfg_attr(test, assert_instr(vpmovdb))]
13489pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13490    unsafe { transmute(vpmovdb256(a.as_i32x8(), i8x16::ZERO, k)) }
13491}
13492
13493/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13494///
13495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13496#[inline]
13497#[target_feature(enable = "avx512f,avx512vl")]
13498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13499#[cfg_attr(test, assert_instr(vpmovdb))]
13500pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13501    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
13502}
13503
13504/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13505///
13506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13507#[inline]
13508#[target_feature(enable = "avx512f,avx512vl")]
13509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13510#[cfg_attr(test, assert_instr(vpmovdb))]
13511pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13512    unsafe { transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k)) }
13513}
13514
13515/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13516///
13517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13518#[inline]
13519#[target_feature(enable = "avx512f,avx512vl")]
13520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13521#[cfg_attr(test, assert_instr(vpmovdb))]
13522pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13523    unsafe { transmute(vpmovdb128(a.as_i32x4(), i8x16::ZERO, k)) }
13524}
13525
13526/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13527///
13528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13529#[inline]
13530#[target_feature(enable = "avx512f")]
13531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13532#[cfg_attr(test, assert_instr(vpmovqd))]
13533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13534pub const fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13535    unsafe {
13536        let a = a.as_i64x8();
13537        transmute::<i32x8, _>(simd_cast(a))
13538    }
13539}
13540
13541/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13542///
13543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13544#[inline]
13545#[target_feature(enable = "avx512f")]
13546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13547#[cfg_attr(test, assert_instr(vpmovqd))]
13548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13549pub const fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13550    unsafe {
13551        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13552        transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
13553    }
13554}
13555
13556/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13557///
13558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13559#[inline]
13560#[target_feature(enable = "avx512f")]
13561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13562#[cfg_attr(test, assert_instr(vpmovqd))]
13563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13564pub const fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13565    unsafe {
13566        let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
13567        transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
13568    }
13569}
13570
13571/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13572///
13573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13574#[inline]
13575#[target_feature(enable = "avx512f,avx512vl")]
13576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13577#[cfg_attr(test, assert_instr(vpmovqd))]
13578#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13579pub const fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13580    unsafe {
13581        let a = a.as_i64x4();
13582        transmute::<i32x4, _>(simd_cast(a))
13583    }
13584}
13585
13586/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13587///
13588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13589#[inline]
13590#[target_feature(enable = "avx512f,avx512vl")]
13591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13592#[cfg_attr(test, assert_instr(vpmovqd))]
13593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13594pub const fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13595    unsafe {
13596        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13597        transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
13598    }
13599}
13600
13601/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13602///
13603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13604#[inline]
13605#[target_feature(enable = "avx512f,avx512vl")]
13606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13607#[cfg_attr(test, assert_instr(vpmovqd))]
13608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13609pub const fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13610    unsafe {
13611        let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
13612        transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
13613    }
13614}
13615
13616/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13617///
13618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13619#[inline]
13620#[target_feature(enable = "avx512f,avx512vl")]
13621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13622#[cfg_attr(test, assert_instr(vpmovqd))]
13623pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13624    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
13625}
13626
13627/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13628///
13629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13630#[inline]
13631#[target_feature(enable = "avx512f,avx512vl")]
13632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13633#[cfg_attr(test, assert_instr(vpmovqd))]
13634pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13635    unsafe { transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k)) }
13636}
13637
13638/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13639///
13640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13641#[inline]
13642#[target_feature(enable = "avx512f,avx512vl")]
13643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13644#[cfg_attr(test, assert_instr(vpmovqd))]
13645pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13646    unsafe { transmute(vpmovqd128(a.as_i64x2(), i32x4::ZERO, k)) }
13647}
13648
13649/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13650///
13651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13652#[inline]
13653#[target_feature(enable = "avx512f")]
13654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13655#[cfg_attr(test, assert_instr(vpmovqw))]
13656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13657pub const fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13658    unsafe {
13659        let a = a.as_i64x8();
13660        transmute::<i16x8, _>(simd_cast(a))
13661    }
13662}
13663
13664/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13670#[cfg_attr(test, assert_instr(vpmovqw))]
13671#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13672pub const fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13673    unsafe {
13674        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13675        transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
13676    }
13677}
13678
13679/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13680///
13681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13682#[inline]
13683#[target_feature(enable = "avx512f")]
13684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13685#[cfg_attr(test, assert_instr(vpmovqw))]
13686#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
13687pub const fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13688    unsafe {
13689        let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
13690        transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
13691    }
13692}
13693
13694/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13695///
13696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13697#[inline]
13698#[target_feature(enable = "avx512f,avx512vl")]
13699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13700#[cfg_attr(test, assert_instr(vpmovqw))]
13701pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13702    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
13703}
13704
13705/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13706///
13707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13708#[inline]
13709#[target_feature(enable = "avx512f,avx512vl")]
13710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13711#[cfg_attr(test, assert_instr(vpmovqw))]
13712pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13713    unsafe { transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k)) }
13714}
13715
13716/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13717///
13718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13719#[inline]
13720#[target_feature(enable = "avx512f,avx512vl")]
13721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13722#[cfg_attr(test, assert_instr(vpmovqw))]
13723pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13724    unsafe { transmute(vpmovqw256(a.as_i64x4(), i16x8::ZERO, k)) }
13725}
13726
13727/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13728///
13729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13730#[inline]
13731#[target_feature(enable = "avx512f,avx512vl")]
13732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13733#[cfg_attr(test, assert_instr(vpmovqw))]
13734pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13735    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
13736}
13737
13738/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13739///
13740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13741#[inline]
13742#[target_feature(enable = "avx512f,avx512vl")]
13743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13744#[cfg_attr(test, assert_instr(vpmovqw))]
13745pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13746    unsafe { transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k)) }
13747}
13748
13749/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13750///
13751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13752#[inline]
13753#[target_feature(enable = "avx512f,avx512vl")]
13754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13755#[cfg_attr(test, assert_instr(vpmovqw))]
13756pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13757    unsafe { transmute(vpmovqw128(a.as_i64x2(), i16x8::ZERO, k)) }
13758}
13759
13760/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13761///
13762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13763#[inline]
13764#[target_feature(enable = "avx512f")]
13765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13766#[cfg_attr(test, assert_instr(vpmovqb))]
13767pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13768    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
13769}
13770
13771/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13772///
13773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13774#[inline]
13775#[target_feature(enable = "avx512f")]
13776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13777#[cfg_attr(test, assert_instr(vpmovqb))]
13778pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13779    unsafe { transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k)) }
13780}
13781
13782/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13783///
13784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13785#[inline]
13786#[target_feature(enable = "avx512f")]
13787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13788#[cfg_attr(test, assert_instr(vpmovqb))]
13789pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13790    unsafe { transmute(vpmovqb(a.as_i64x8(), i8x16::ZERO, k)) }
13791}
13792
13793/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13794///
13795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13796#[inline]
13797#[target_feature(enable = "avx512f,avx512vl")]
13798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13799#[cfg_attr(test, assert_instr(vpmovqb))]
13800pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13801    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
13802}
13803
13804/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13805///
13806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13807#[inline]
13808#[target_feature(enable = "avx512f,avx512vl")]
13809#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13810#[cfg_attr(test, assert_instr(vpmovqb))]
13811pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13812    unsafe { transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k)) }
13813}
13814
13815/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13816///
13817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13818#[inline]
13819#[target_feature(enable = "avx512f,avx512vl")]
13820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13821#[cfg_attr(test, assert_instr(vpmovqb))]
13822pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13823    unsafe { transmute(vpmovqb256(a.as_i64x4(), i8x16::ZERO, k)) }
13824}
13825
13826/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13827///
13828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13829#[inline]
13830#[target_feature(enable = "avx512f,avx512vl")]
13831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13832#[cfg_attr(test, assert_instr(vpmovqb))]
13833pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13834    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
13835}
13836
13837/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13838///
13839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13840#[inline]
13841#[target_feature(enable = "avx512f,avx512vl")]
13842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13843#[cfg_attr(test, assert_instr(vpmovqb))]
13844pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13845    unsafe { transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k)) }
13846}
13847
13848/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13849///
13850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13851#[inline]
13852#[target_feature(enable = "avx512f,avx512vl")]
13853#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13854#[cfg_attr(test, assert_instr(vpmovqb))]
13855pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13856    unsafe { transmute(vpmovqb128(a.as_i64x2(), i8x16::ZERO, k)) }
13857}
13858
13859/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13860///
13861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13862#[inline]
13863#[target_feature(enable = "avx512f")]
13864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13865#[cfg_attr(test, assert_instr(vpmovsdw))]
13866pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13867    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, 0b11111111_11111111)) }
13868}
13869
13870/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13871///
13872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13873#[inline]
13874#[target_feature(enable = "avx512f")]
13875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13876#[cfg_attr(test, assert_instr(vpmovsdw))]
13877pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13878    unsafe { transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k)) }
13879}
13880
13881/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13882///
13883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13884#[inline]
13885#[target_feature(enable = "avx512f")]
13886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13887#[cfg_attr(test, assert_instr(vpmovsdw))]
13888pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13889    unsafe { transmute(vpmovsdw(a.as_i32x16(), i16x16::ZERO, k)) }
13890}
13891
13892/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13893///
13894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13895#[inline]
13896#[target_feature(enable = "avx512f,avx512vl")]
13897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13898#[cfg_attr(test, assert_instr(vpmovsdw))]
13899pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13900    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, 0b11111111)) }
13901}
13902
13903/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13904///
13905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13906#[inline]
13907#[target_feature(enable = "avx512f,avx512vl")]
13908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13909#[cfg_attr(test, assert_instr(vpmovsdw))]
13910pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13911    unsafe { transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) }
13912}
13913
13914/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13915///
13916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13917#[inline]
13918#[target_feature(enable = "avx512f,avx512vl")]
13919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13920#[cfg_attr(test, assert_instr(vpmovsdw))]
13921pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13922    unsafe { transmute(vpmovsdw256(a.as_i32x8(), i16x8::ZERO, k)) }
13923}
13924
13925/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13926///
13927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13928#[inline]
13929#[target_feature(enable = "avx512f,avx512vl")]
13930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13931#[cfg_attr(test, assert_instr(vpmovsdw))]
13932pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13933    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, 0b11111111)) }
13934}
13935
13936/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13937///
13938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13939#[inline]
13940#[target_feature(enable = "avx512f,avx512vl")]
13941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13942#[cfg_attr(test, assert_instr(vpmovsdw))]
13943pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13944    unsafe { transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) }
13945}
13946
13947/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13948///
13949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13950#[inline]
13951#[target_feature(enable = "avx512f,avx512vl")]
13952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13953#[cfg_attr(test, assert_instr(vpmovsdw))]
13954pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13955    unsafe { transmute(vpmovsdw128(a.as_i32x4(), i16x8::ZERO, k)) }
13956}
13957
13958/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13959///
13960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13961#[inline]
13962#[target_feature(enable = "avx512f")]
13963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13964#[cfg_attr(test, assert_instr(vpmovsdb))]
13965pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13966    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, 0b11111111_11111111)) }
13967}
13968
13969/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13970///
13971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13972#[inline]
13973#[target_feature(enable = "avx512f")]
13974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13975#[cfg_attr(test, assert_instr(vpmovsdb))]
13976pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13977    unsafe { transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k)) }
13978}
13979
13980/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13981///
13982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13983#[inline]
13984#[target_feature(enable = "avx512f")]
13985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13986#[cfg_attr(test, assert_instr(vpmovsdb))]
13987pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13988    unsafe { transmute(vpmovsdb(a.as_i32x16(), i8x16::ZERO, k)) }
13989}
13990
13991/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13992///
13993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13994#[inline]
13995#[target_feature(enable = "avx512f,avx512vl")]
13996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13997#[cfg_attr(test, assert_instr(vpmovsdb))]
13998pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13999    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, 0b11111111)) }
14000}
14001
14002/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14003///
14004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
14005#[inline]
14006#[target_feature(enable = "avx512f,avx512vl")]
14007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14008#[cfg_attr(test, assert_instr(vpmovsdb))]
14009pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14010    unsafe { transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) }
14011}
14012
14013/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14014///
14015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
14016#[inline]
14017#[target_feature(enable = "avx512f,avx512vl")]
14018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14019#[cfg_attr(test, assert_instr(vpmovsdb))]
14020pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14021    unsafe { transmute(vpmovsdb256(a.as_i32x8(), i8x16::ZERO, k)) }
14022}
14023
14024/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14025///
14026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
14027#[inline]
14028#[target_feature(enable = "avx512f,avx512vl")]
14029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14030#[cfg_attr(test, assert_instr(vpmovsdb))]
14031pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
14032    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, 0b11111111)) }
14033}
14034
14035/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14036///
14037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
14038#[inline]
14039#[target_feature(enable = "avx512f,avx512vl")]
14040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14041#[cfg_attr(test, assert_instr(vpmovsdb))]
14042pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14043    unsafe { transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) }
14044}
14045
14046/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14047///
14048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
14049#[inline]
14050#[target_feature(enable = "avx512f,avx512vl")]
14051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14052#[cfg_attr(test, assert_instr(vpmovsdb))]
14053pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14054    unsafe { transmute(vpmovsdb128(a.as_i32x4(), i8x16::ZERO, k)) }
14055}
14056
14057/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14058///
14059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
14060#[inline]
14061#[target_feature(enable = "avx512f")]
14062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14063#[cfg_attr(test, assert_instr(vpmovsqd))]
14064pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
14065    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, 0b11111111)) }
14066}
14067
14068/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14069///
14070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
14071#[inline]
14072#[target_feature(enable = "avx512f")]
14073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14074#[cfg_attr(test, assert_instr(vpmovsqd))]
14075pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14076    unsafe { transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k)) }
14077}
14078
14079/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14080///
14081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
14082#[inline]
14083#[target_feature(enable = "avx512f")]
14084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14085#[cfg_attr(test, assert_instr(vpmovsqd))]
14086pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14087    unsafe { transmute(vpmovsqd(a.as_i64x8(), i32x8::ZERO, k)) }
14088}
14089
14090/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14091///
14092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
14093#[inline]
14094#[target_feature(enable = "avx512f,avx512vl")]
14095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14096#[cfg_attr(test, assert_instr(vpmovsqd))]
14097pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
14098    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, 0b11111111)) }
14099}
14100
14101/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14102///
14103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
14104#[inline]
14105#[target_feature(enable = "avx512f,avx512vl")]
14106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14107#[cfg_attr(test, assert_instr(vpmovsqd))]
14108pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14109    unsafe { transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) }
14110}
14111
14112/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14113///
14114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
14115#[inline]
14116#[target_feature(enable = "avx512f,avx512vl")]
14117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14118#[cfg_attr(test, assert_instr(vpmovsqd))]
14119pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14120    unsafe { transmute(vpmovsqd256(a.as_i64x4(), i32x4::ZERO, k)) }
14121}
14122
14123/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
14124///
14125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
14126#[inline]
14127#[target_feature(enable = "avx512f,avx512vl")]
14128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14129#[cfg_attr(test, assert_instr(vpmovsqd))]
14130pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
14131    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, 0b11111111)) }
14132}
14133
14134/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14135///
14136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
14137#[inline]
14138#[target_feature(enable = "avx512f,avx512vl")]
14139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14140#[cfg_attr(test, assert_instr(vpmovsqd))]
14141pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14142    unsafe { transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) }
14143}
14144
14145/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14146///
14147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
14148#[inline]
14149#[target_feature(enable = "avx512f,avx512vl")]
14150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14151#[cfg_attr(test, assert_instr(vpmovsqd))]
14152pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14153    unsafe { transmute(vpmovsqd128(a.as_i64x2(), i32x4::ZERO, k)) }
14154}
14155
14156/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14157///
14158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
14159#[inline]
14160#[target_feature(enable = "avx512f")]
14161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14162#[cfg_attr(test, assert_instr(vpmovsqw))]
14163pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
14164    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, 0b11111111)) }
14165}
14166
14167/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14168///
14169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
14170#[inline]
14171#[target_feature(enable = "avx512f")]
14172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14173#[cfg_attr(test, assert_instr(vpmovsqw))]
14174pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14175    unsafe { transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k)) }
14176}
14177
14178/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14179///
14180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
14181#[inline]
14182#[target_feature(enable = "avx512f")]
14183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14184#[cfg_attr(test, assert_instr(vpmovsqw))]
14185pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14186    unsafe { transmute(vpmovsqw(a.as_i64x8(), i16x8::ZERO, k)) }
14187}
14188
14189/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14190///
14191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
14192#[inline]
14193#[target_feature(enable = "avx512f,avx512vl")]
14194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14195#[cfg_attr(test, assert_instr(vpmovsqw))]
14196pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
14197    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, 0b11111111)) }
14198}
14199
14200/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14201///
14202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
14203#[inline]
14204#[target_feature(enable = "avx512f,avx512vl")]
14205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14206#[cfg_attr(test, assert_instr(vpmovsqw))]
14207pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14208    unsafe { transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) }
14209}
14210
14211/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14212///
14213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
14214#[inline]
14215#[target_feature(enable = "avx512f,avx512vl")]
14216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14217#[cfg_attr(test, assert_instr(vpmovsqw))]
14218pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14219    unsafe { transmute(vpmovsqw256(a.as_i64x4(), i16x8::ZERO, k)) }
14220}
14221
14222/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
14223///
14224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
14225#[inline]
14226#[target_feature(enable = "avx512f,avx512vl")]
14227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14228#[cfg_attr(test, assert_instr(vpmovsqw))]
14229pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
14230    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, 0b11111111)) }
14231}
14232
14233/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14234///
14235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
14236#[inline]
14237#[target_feature(enable = "avx512f,avx512vl")]
14238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14239#[cfg_attr(test, assert_instr(vpmovsqw))]
14240pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14241    unsafe { transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) }
14242}
14243
14244/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14245///
14246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
14247#[inline]
14248#[target_feature(enable = "avx512f,avx512vl")]
14249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14250#[cfg_attr(test, assert_instr(vpmovsqw))]
14251pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14252    unsafe { transmute(vpmovsqw128(a.as_i64x2(), i16x8::ZERO, k)) }
14253}
14254
14255/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14256///
14257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
14258#[inline]
14259#[target_feature(enable = "avx512f")]
14260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14261#[cfg_attr(test, assert_instr(vpmovsqb))]
14262pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
14263    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, 0b11111111)) }
14264}
14265
14266/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14267///
14268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
14269#[inline]
14270#[target_feature(enable = "avx512f")]
14271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14272#[cfg_attr(test, assert_instr(vpmovsqb))]
14273pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14274    unsafe { transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k)) }
14275}
14276
14277/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14278///
14279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
14280#[inline]
14281#[target_feature(enable = "avx512f")]
14282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14283#[cfg_attr(test, assert_instr(vpmovsqb))]
14284pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14285    unsafe { transmute(vpmovsqb(a.as_i64x8(), i8x16::ZERO, k)) }
14286}
14287
14288/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14289///
14290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
14291#[inline]
14292#[target_feature(enable = "avx512f,avx512vl")]
14293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14294#[cfg_attr(test, assert_instr(vpmovsqb))]
14295pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
14296    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, 0b11111111)) }
14297}
14298
14299/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14300///
14301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
14302#[inline]
14303#[target_feature(enable = "avx512f,avx512vl")]
14304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14305#[cfg_attr(test, assert_instr(vpmovsqb))]
14306pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14307    unsafe { transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) }
14308}
14309
14310/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14311///
14312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
14313#[inline]
14314#[target_feature(enable = "avx512f,avx512vl")]
14315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14316#[cfg_attr(test, assert_instr(vpmovsqb))]
14317pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14318    unsafe { transmute(vpmovsqb256(a.as_i64x4(), i8x16::ZERO, k)) }
14319}
14320
14321/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
14322///
14323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
14324#[inline]
14325#[target_feature(enable = "avx512f,avx512vl")]
14326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14327#[cfg_attr(test, assert_instr(vpmovsqb))]
14328pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
14329    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, 0b11111111)) }
14330}
14331
14332/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14333///
14334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
14335#[inline]
14336#[target_feature(enable = "avx512f,avx512vl")]
14337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14338#[cfg_attr(test, assert_instr(vpmovsqb))]
14339pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14340    unsafe { transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) }
14341}
14342
14343/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14344///
14345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
14346#[inline]
14347#[target_feature(enable = "avx512f,avx512vl")]
14348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14349#[cfg_attr(test, assert_instr(vpmovsqb))]
14350pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14351    unsafe { transmute(vpmovsqb128(a.as_i64x2(), i8x16::ZERO, k)) }
14352}
14353
14354/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14355///
14356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
14357#[inline]
14358#[target_feature(enable = "avx512f")]
14359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14360#[cfg_attr(test, assert_instr(vpmovusdw))]
14361pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
14362    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, 0b11111111_11111111)) }
14363}
14364
14365/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14366///
14367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
14368#[inline]
14369#[target_feature(enable = "avx512f")]
14370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14371#[cfg_attr(test, assert_instr(vpmovusdw))]
14372pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
14373    unsafe { transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k)) }
14374}
14375
14376/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14377///
14378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
14379#[inline]
14380#[target_feature(enable = "avx512f")]
14381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14382#[cfg_attr(test, assert_instr(vpmovusdw))]
14383pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
14384    unsafe { transmute(vpmovusdw(a.as_u32x16(), u16x16::ZERO, k)) }
14385}
14386
14387/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14388///
14389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
14390#[inline]
14391#[target_feature(enable = "avx512f,avx512vl")]
14392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14393#[cfg_attr(test, assert_instr(vpmovusdw))]
14394pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14395    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, 0b11111111)) }
14396}
14397
14398/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14399///
14400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14401#[inline]
14402#[target_feature(enable = "avx512f,avx512vl")]
14403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14404#[cfg_attr(test, assert_instr(vpmovusdw))]
14405pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14406    unsafe { transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) }
14407}
14408
14409/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14410///
14411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14412#[inline]
14413#[target_feature(enable = "avx512f,avx512vl")]
14414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14415#[cfg_attr(test, assert_instr(vpmovusdw))]
14416pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14417    unsafe { transmute(vpmovusdw256(a.as_u32x8(), u16x8::ZERO, k)) }
14418}
14419
14420/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14421///
14422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14423#[inline]
14424#[target_feature(enable = "avx512f,avx512vl")]
14425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14426#[cfg_attr(test, assert_instr(vpmovusdw))]
14427pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14428    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, 0b11111111)) }
14429}
14430
14431/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14432///
14433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14434#[inline]
14435#[target_feature(enable = "avx512f,avx512vl")]
14436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14437#[cfg_attr(test, assert_instr(vpmovusdw))]
14438pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14439    unsafe { transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) }
14440}
14441
14442/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14443///
14444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14445#[inline]
14446#[target_feature(enable = "avx512f,avx512vl")]
14447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14448#[cfg_attr(test, assert_instr(vpmovusdw))]
14449pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14450    unsafe { transmute(vpmovusdw128(a.as_u32x4(), u16x8::ZERO, k)) }
14451}
14452
14453/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14454///
14455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14456#[inline]
14457#[target_feature(enable = "avx512f")]
14458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14459#[cfg_attr(test, assert_instr(vpmovusdb))]
14460pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14461    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, 0b11111111_11111111)) }
14462}
14463
14464/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14465///
14466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14467#[inline]
14468#[target_feature(enable = "avx512f")]
14469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14470#[cfg_attr(test, assert_instr(vpmovusdb))]
14471pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14472    unsafe { transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k)) }
14473}
14474
14475/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14476///
14477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14478#[inline]
14479#[target_feature(enable = "avx512f")]
14480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14481#[cfg_attr(test, assert_instr(vpmovusdb))]
14482pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14483    unsafe { transmute(vpmovusdb(a.as_u32x16(), u8x16::ZERO, k)) }
14484}
14485
14486/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14487///
14488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14489#[inline]
14490#[target_feature(enable = "avx512f,avx512vl")]
14491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14492#[cfg_attr(test, assert_instr(vpmovusdb))]
14493pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14494    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, 0b11111111)) }
14495}
14496
14497/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14498///
14499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14500#[inline]
14501#[target_feature(enable = "avx512f,avx512vl")]
14502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14503#[cfg_attr(test, assert_instr(vpmovusdb))]
14504pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14505    unsafe { transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) }
14506}
14507
14508/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14509///
14510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14511#[inline]
14512#[target_feature(enable = "avx512f,avx512vl")]
14513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14514#[cfg_attr(test, assert_instr(vpmovusdb))]
14515pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14516    unsafe { transmute(vpmovusdb256(a.as_u32x8(), u8x16::ZERO, k)) }
14517}
14518
14519/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14520///
14521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14522#[inline]
14523#[target_feature(enable = "avx512f,avx512vl")]
14524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14525#[cfg_attr(test, assert_instr(vpmovusdb))]
14526pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14527    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, 0b11111111)) }
14528}
14529
14530/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14531///
14532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14533#[inline]
14534#[target_feature(enable = "avx512f,avx512vl")]
14535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14536#[cfg_attr(test, assert_instr(vpmovusdb))]
14537pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14538    unsafe { transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) }
14539}
14540
14541/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14542///
14543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14544#[inline]
14545#[target_feature(enable = "avx512f,avx512vl")]
14546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14547#[cfg_attr(test, assert_instr(vpmovusdb))]
14548pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14549    unsafe { transmute(vpmovusdb128(a.as_u32x4(), u8x16::ZERO, k)) }
14550}
14551
14552/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14553///
14554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14555#[inline]
14556#[target_feature(enable = "avx512f")]
14557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14558#[cfg_attr(test, assert_instr(vpmovusqd))]
14559pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14560    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, 0b11111111)) }
14561}
14562
14563/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14564///
14565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14566#[inline]
14567#[target_feature(enable = "avx512f")]
14568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14569#[cfg_attr(test, assert_instr(vpmovusqd))]
14570pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14571    unsafe { transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k)) }
14572}
14573
14574/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14575///
14576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14577#[inline]
14578#[target_feature(enable = "avx512f")]
14579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14580#[cfg_attr(test, assert_instr(vpmovusqd))]
14581pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14582    unsafe { transmute(vpmovusqd(a.as_u64x8(), u32x8::ZERO, k)) }
14583}
14584
14585/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14586///
14587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14588#[inline]
14589#[target_feature(enable = "avx512f,avx512vl")]
14590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14591#[cfg_attr(test, assert_instr(vpmovusqd))]
14592pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14593    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, 0b11111111)) }
14594}
14595
14596/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14597///
14598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14599#[inline]
14600#[target_feature(enable = "avx512f,avx512vl")]
14601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14602#[cfg_attr(test, assert_instr(vpmovusqd))]
14603pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14604    unsafe { transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) }
14605}
14606
14607/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14608///
14609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14610#[inline]
14611#[target_feature(enable = "avx512f,avx512vl")]
14612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14613#[cfg_attr(test, assert_instr(vpmovusqd))]
14614pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14615    unsafe { transmute(vpmovusqd256(a.as_u64x4(), u32x4::ZERO, k)) }
14616}
14617
14618/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14619///
14620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14621#[inline]
14622#[target_feature(enable = "avx512f,avx512vl")]
14623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14624#[cfg_attr(test, assert_instr(vpmovusqd))]
14625pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14626    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, 0b11111111)) }
14627}
14628
14629/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14632#[inline]
14633#[target_feature(enable = "avx512f,avx512vl")]
14634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14635#[cfg_attr(test, assert_instr(vpmovusqd))]
14636pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14637    unsafe { transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) }
14638}
14639
14640/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14641///
14642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14643#[inline]
14644#[target_feature(enable = "avx512f,avx512vl")]
14645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14646#[cfg_attr(test, assert_instr(vpmovusqd))]
14647pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14648    unsafe { transmute(vpmovusqd128(a.as_u64x2(), u32x4::ZERO, k)) }
14649}
14650
14651/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14652///
14653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14654#[inline]
14655#[target_feature(enable = "avx512f")]
14656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14657#[cfg_attr(test, assert_instr(vpmovusqw))]
14658pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14659    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, 0b11111111)) }
14660}
14661
14662/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14663///
14664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14665#[inline]
14666#[target_feature(enable = "avx512f")]
14667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14668#[cfg_attr(test, assert_instr(vpmovusqw))]
14669pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14670    unsafe { transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k)) }
14671}
14672
14673/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14674///
14675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14676#[inline]
14677#[target_feature(enable = "avx512f")]
14678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14679#[cfg_attr(test, assert_instr(vpmovusqw))]
14680pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14681    unsafe { transmute(vpmovusqw(a.as_u64x8(), u16x8::ZERO, k)) }
14682}
14683
14684/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14685///
14686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14687#[inline]
14688#[target_feature(enable = "avx512f,avx512vl")]
14689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14690#[cfg_attr(test, assert_instr(vpmovusqw))]
14691pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14692    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, 0b11111111)) }
14693}
14694
14695/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14696///
14697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14698#[inline]
14699#[target_feature(enable = "avx512f,avx512vl")]
14700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14701#[cfg_attr(test, assert_instr(vpmovusqw))]
14702pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14703    unsafe { transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) }
14704}
14705
14706/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14707///
14708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14709#[inline]
14710#[target_feature(enable = "avx512f,avx512vl")]
14711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14712#[cfg_attr(test, assert_instr(vpmovusqw))]
14713pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14714    unsafe { transmute(vpmovusqw256(a.as_u64x4(), u16x8::ZERO, k)) }
14715}
14716
14717/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14718///
14719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14720#[inline]
14721#[target_feature(enable = "avx512f,avx512vl")]
14722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14723#[cfg_attr(test, assert_instr(vpmovusqw))]
14724pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14725    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, 0b11111111)) }
14726}
14727
14728/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14729///
14730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14731#[inline]
14732#[target_feature(enable = "avx512f,avx512vl")]
14733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14734#[cfg_attr(test, assert_instr(vpmovusqw))]
14735pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14736    unsafe { transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) }
14737}
14738
14739/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14740///
14741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14742#[inline]
14743#[target_feature(enable = "avx512f,avx512vl")]
14744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14745#[cfg_attr(test, assert_instr(vpmovusqw))]
14746pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14747    unsafe { transmute(vpmovusqw128(a.as_u64x2(), u16x8::ZERO, k)) }
14748}
14749
14750/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14751///
14752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14753#[inline]
14754#[target_feature(enable = "avx512f")]
14755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14756#[cfg_attr(test, assert_instr(vpmovusqb))]
14757pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14758    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, 0b11111111)) }
14759}
14760
14761/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14762///
14763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14764#[inline]
14765#[target_feature(enable = "avx512f")]
14766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14767#[cfg_attr(test, assert_instr(vpmovusqb))]
14768pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14769    unsafe { transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k)) }
14770}
14771
14772/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14773///
14774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14775#[inline]
14776#[target_feature(enable = "avx512f")]
14777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14778#[cfg_attr(test, assert_instr(vpmovusqb))]
14779pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14780    unsafe { transmute(vpmovusqb(a.as_u64x8(), u8x16::ZERO, k)) }
14781}
14782
14783/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14784///
14785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14786#[inline]
14787#[target_feature(enable = "avx512f,avx512vl")]
14788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14789#[cfg_attr(test, assert_instr(vpmovusqb))]
14790pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14791    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, 0b11111111)) }
14792}
14793
14794/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14795///
14796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14797#[inline]
14798#[target_feature(enable = "avx512f,avx512vl")]
14799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14800#[cfg_attr(test, assert_instr(vpmovusqb))]
14801pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14802    unsafe { transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) }
14803}
14804
14805/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14806///
14807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14808#[inline]
14809#[target_feature(enable = "avx512f,avx512vl")]
14810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14811#[cfg_attr(test, assert_instr(vpmovusqb))]
14812pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14813    unsafe { transmute(vpmovusqb256(a.as_u64x4(), u8x16::ZERO, k)) }
14814}
14815
14816/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14817///
14818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14819#[inline]
14820#[target_feature(enable = "avx512f,avx512vl")]
14821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14822#[cfg_attr(test, assert_instr(vpmovusqb))]
14823pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14824    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, 0b11111111)) }
14825}
14826
14827/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14828///
14829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14830#[inline]
14831#[target_feature(enable = "avx512f,avx512vl")]
14832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14833#[cfg_attr(test, assert_instr(vpmovusqb))]
14834pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14835    unsafe { transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) }
14836}
14837
14838/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14839///
14840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14841#[inline]
14842#[target_feature(enable = "avx512f,avx512vl")]
14843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14844#[cfg_attr(test, assert_instr(vpmovusqb))]
14845pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14846    unsafe { transmute(vpmovusqb128(a.as_u64x2(), u8x16::ZERO, k)) }
14847}
14848
14849/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14850///
14851/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14852/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14853/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14854/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14855/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14856/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14857///
14858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14859#[inline]
14860#[target_feature(enable = "avx512f")]
14861#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14862#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14863#[rustc_legacy_const_generics(1)]
14864pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14865    unsafe {
14866        static_assert_rounding!(ROUNDING);
14867        let a = a.as_f32x16();
14868        let r = vcvtps2dq(a, i32x16::ZERO, 0b11111111_11111111, ROUNDING);
14869        transmute(r)
14870    }
14871}
14872
14873/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14874///
14875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14881///
14882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14883#[inline]
14884#[target_feature(enable = "avx512f")]
14885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14886#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14887#[rustc_legacy_const_generics(3)]
14888pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14889    src: __m512i,
14890    k: __mmask16,
14891    a: __m512,
14892) -> __m512i {
14893    unsafe {
14894        static_assert_rounding!(ROUNDING);
14895        let a = a.as_f32x16();
14896        let src = src.as_i32x16();
14897        let r = vcvtps2dq(a, src, k, ROUNDING);
14898        transmute(r)
14899    }
14900}
14901
14902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14903///
14904/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14905/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14906/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14907/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14908/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14909/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14910///
14911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14912#[inline]
14913#[target_feature(enable = "avx512f")]
14914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14915#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14916#[rustc_legacy_const_generics(2)]
14917pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14918    unsafe {
14919        static_assert_rounding!(ROUNDING);
14920        let a = a.as_f32x16();
14921        let r = vcvtps2dq(a, i32x16::ZERO, k, ROUNDING);
14922        transmute(r)
14923    }
14924}
14925
14926/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14927///
14928/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14929/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14930/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14931/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14932/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14933/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14934///
14935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14936#[inline]
14937#[target_feature(enable = "avx512f")]
14938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14939#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14940#[rustc_legacy_const_generics(1)]
14941pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14942    unsafe {
14943        static_assert_rounding!(ROUNDING);
14944        let a = a.as_f32x16();
14945        let r = vcvtps2udq(a, u32x16::ZERO, 0b11111111_11111111, ROUNDING);
14946        transmute(r)
14947    }
14948}
14949
14950/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14951///
14952/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14953/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14954/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14955/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14956/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14957/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14958///
14959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14960#[inline]
14961#[target_feature(enable = "avx512f")]
14962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14963#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14964#[rustc_legacy_const_generics(3)]
14965pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14966    src: __m512i,
14967    k: __mmask16,
14968    a: __m512,
14969) -> __m512i {
14970    unsafe {
14971        static_assert_rounding!(ROUNDING);
14972        let a = a.as_f32x16();
14973        let src = src.as_u32x16();
14974        let r = vcvtps2udq(a, src, k, ROUNDING);
14975        transmute(r)
14976    }
14977}
14978
14979/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14980///
14981/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14982/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14983/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14984/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14985/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14986/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14987///
14988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14989#[inline]
14990#[target_feature(enable = "avx512f")]
14991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14992#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14993#[rustc_legacy_const_generics(2)]
14994pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14995    unsafe {
14996        static_assert_rounding!(ROUNDING);
14997        let a = a.as_f32x16();
14998        let r = vcvtps2udq(a, u32x16::ZERO, k, ROUNDING);
14999        transmute(r)
15000    }
15001}
15002
15003/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
15004/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15005///
15006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
15007#[inline]
15008#[target_feature(enable = "avx512f")]
15009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15010#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15011#[rustc_legacy_const_generics(1)]
15012pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
15013    unsafe {
15014        static_assert_sae!(SAE);
15015        let a = a.as_f32x8();
15016        let r = vcvtps2pd(a, f64x8::ZERO, 0b11111111, SAE);
15017        transmute(r)
15018    }
15019}
15020
15021/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15023///
15024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
15025#[inline]
15026#[target_feature(enable = "avx512f")]
15027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15028#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15029#[rustc_legacy_const_generics(3)]
15030pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
15031    unsafe {
15032        static_assert_sae!(SAE);
15033        let a = a.as_f32x8();
15034        let src = src.as_f64x8();
15035        let r = vcvtps2pd(a, src, k, SAE);
15036        transmute(r)
15037    }
15038}
15039
15040/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15041/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15042///
15043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
15044#[inline]
15045#[target_feature(enable = "avx512f")]
15046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15047#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
15048#[rustc_legacy_const_generics(2)]
15049pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
15050    unsafe {
15051        static_assert_sae!(SAE);
15052        let a = a.as_f32x8();
15053        let r = vcvtps2pd(a, f64x8::ZERO, k, SAE);
15054        transmute(r)
15055    }
15056}
15057
15058/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
15059///
15060/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15061/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15062/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15063/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15064/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15065/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15066///
15067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
15068#[inline]
15069#[target_feature(enable = "avx512f")]
15070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15071#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15072#[rustc_legacy_const_generics(1)]
15073pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15074    unsafe {
15075        static_assert_rounding!(ROUNDING);
15076        let a = a.as_f64x8();
15077        let r = vcvtpd2dq(a, i32x8::ZERO, 0b11111111, ROUNDING);
15078        transmute(r)
15079    }
15080}
15081
15082/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15083///
15084/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15085/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15086/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15087/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15088/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15089/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15090///
15091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
15092#[inline]
15093#[target_feature(enable = "avx512f")]
15094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15095#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15096#[rustc_legacy_const_generics(3)]
15097pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
15098    src: __m256i,
15099    k: __mmask8,
15100    a: __m512d,
15101) -> __m256i {
15102    unsafe {
15103        static_assert_rounding!(ROUNDING);
15104        let a = a.as_f64x8();
15105        let src = src.as_i32x8();
15106        let r = vcvtpd2dq(a, src, k, ROUNDING);
15107        transmute(r)
15108    }
15109}
15110
15111/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15112///
15113/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15114/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15115/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15116/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15117/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15118/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15119///
15120/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
15121#[inline]
15122#[target_feature(enable = "avx512f")]
15123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15124#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
15125#[rustc_legacy_const_generics(2)]
15126pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15127    unsafe {
15128        static_assert_rounding!(ROUNDING);
15129        let a = a.as_f64x8();
15130        let r = vcvtpd2dq(a, i32x8::ZERO, k, ROUNDING);
15131        transmute(r)
15132    }
15133}
15134
15135/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
15136///
15137/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15138/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15139/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15140/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15141/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15142/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15143///
15144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
15145#[inline]
15146#[target_feature(enable = "avx512f")]
15147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15148#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15149#[rustc_legacy_const_generics(1)]
15150pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
15151    unsafe {
15152        static_assert_rounding!(ROUNDING);
15153        let a = a.as_f64x8();
15154        let r = vcvtpd2udq(a, u32x8::ZERO, 0b11111111, ROUNDING);
15155        transmute(r)
15156    }
15157}
15158
15159/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15160///
15161/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15162/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15163/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15164/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15165/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15166/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15167///
15168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
15169#[inline]
15170#[target_feature(enable = "avx512f")]
15171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15172#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15173#[rustc_legacy_const_generics(3)]
15174pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
15175    src: __m256i,
15176    k: __mmask8,
15177    a: __m512d,
15178) -> __m256i {
15179    unsafe {
15180        static_assert_rounding!(ROUNDING);
15181        let a = a.as_f64x8();
15182        let src = src.as_u32x8();
15183        let r = vcvtpd2udq(a, src, k, ROUNDING);
15184        transmute(r)
15185    }
15186}
15187
15188/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15189///
15190/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15191/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15192/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15193/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15194/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15195/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15196///
15197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
15198#[inline]
15199#[target_feature(enable = "avx512f")]
15200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15201#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
15202#[rustc_legacy_const_generics(2)]
15203pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
15204    unsafe {
15205        static_assert_rounding!(ROUNDING);
15206        let a = a.as_f64x8();
15207        let r = vcvtpd2udq(a, u32x8::ZERO, k, ROUNDING);
15208        transmute(r)
15209    }
15210}
15211
15212/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15213///
15214/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15215/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15216/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15217/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15218/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15219/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15220///
15221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
15222#[inline]
15223#[target_feature(enable = "avx512f")]
15224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15225#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15226#[rustc_legacy_const_generics(1)]
15227pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
15228    unsafe {
15229        static_assert_rounding!(ROUNDING);
15230        let a = a.as_f64x8();
15231        let r = vcvtpd2ps(a, f32x8::ZERO, 0b11111111, ROUNDING);
15232        transmute(r)
15233    }
15234}
15235
15236/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15237///
15238/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15239/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15240/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15241/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15242/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15243/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15244///
15245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
15246#[inline]
15247#[target_feature(enable = "avx512f")]
15248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15249#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15250#[rustc_legacy_const_generics(3)]
15251pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
15252    src: __m256,
15253    k: __mmask8,
15254    a: __m512d,
15255) -> __m256 {
15256    unsafe {
15257        static_assert_rounding!(ROUNDING);
15258        let a = a.as_f64x8();
15259        let src = src.as_f32x8();
15260        let r = vcvtpd2ps(a, src, k, ROUNDING);
15261        transmute(r)
15262    }
15263}
15264
15265/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15266///
15267/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15268/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15269/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15270/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15271/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15272/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15273///
15274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
15275#[inline]
15276#[target_feature(enable = "avx512f")]
15277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15278#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
15279#[rustc_legacy_const_generics(2)]
15280pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
15281    unsafe {
15282        static_assert_rounding!(ROUNDING);
15283        let a = a.as_f64x8();
15284        let r = vcvtpd2ps(a, f32x8::ZERO, k, ROUNDING);
15285        transmute(r)
15286    }
15287}
15288
15289/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15290///
15291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15297///
15298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
15299#[inline]
15300#[target_feature(enable = "avx512f")]
15301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15302#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15303#[rustc_legacy_const_generics(1)]
15304pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15305    unsafe {
15306        static_assert_rounding!(ROUNDING);
15307        let a = a.as_i32x16();
15308        let r = vcvtdq2ps(a, ROUNDING);
15309        transmute(r)
15310    }
15311}
15312
15313/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15314///
15315/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15316/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15317/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15318/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15319/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15320/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15321///
15322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
15323#[inline]
15324#[target_feature(enable = "avx512f")]
15325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15326#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15327#[rustc_legacy_const_generics(3)]
15328pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
15329    src: __m512,
15330    k: __mmask16,
15331    a: __m512i,
15332) -> __m512 {
15333    unsafe {
15334        static_assert_rounding!(ROUNDING);
15335        let a = a.as_i32x16();
15336        let r = vcvtdq2ps(a, ROUNDING);
15337        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15338    }
15339}
15340
15341/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15342///
15343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15349///
15350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
15351#[inline]
15352#[target_feature(enable = "avx512f")]
15353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15354#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
15355#[rustc_legacy_const_generics(2)]
15356pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15357    unsafe {
15358        static_assert_rounding!(ROUNDING);
15359        let a = a.as_i32x16();
15360        let r = vcvtdq2ps(a, ROUNDING);
15361        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15362    }
15363}
15364
15365/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15366///
15367/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15368/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15369/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15370/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15371/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15372/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15373///
15374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
15375#[inline]
15376#[target_feature(enable = "avx512f")]
15377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15378#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15379#[rustc_legacy_const_generics(1)]
15380pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
15381    unsafe {
15382        static_assert_rounding!(ROUNDING);
15383        let a = a.as_u32x16();
15384        let r = vcvtudq2ps(a, ROUNDING);
15385        transmute(r)
15386    }
15387}
15388
15389/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15390///
15391/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15392/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15393/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15394/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15395/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15396/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15397///
15398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15399#[inline]
15400#[target_feature(enable = "avx512f")]
15401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15402#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15403#[rustc_legacy_const_generics(3)]
15404pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15405    src: __m512,
15406    k: __mmask16,
15407    a: __m512i,
15408) -> __m512 {
15409    unsafe {
15410        static_assert_rounding!(ROUNDING);
15411        let a = a.as_u32x16();
15412        let r = vcvtudq2ps(a, ROUNDING);
15413        transmute(simd_select_bitmask(k, r, src.as_f32x16()))
15414    }
15415}
15416
15417/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15418///
15419/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15420/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15421/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15422/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15423/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15424/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15425///
15426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15427#[inline]
15428#[target_feature(enable = "avx512f")]
15429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15430#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15431#[rustc_legacy_const_generics(2)]
15432pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15433    unsafe {
15434        static_assert_rounding!(ROUNDING);
15435        let a = a.as_u32x16();
15436        let r = vcvtudq2ps(a, ROUNDING);
15437        transmute(simd_select_bitmask(k, r, f32x16::ZERO))
15438    }
15439}
15440
15441/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15443///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15444///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15445///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15446///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15447///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15448///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15449///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15450///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15451///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15452///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15453///
15454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15455#[inline]
15456#[target_feature(enable = "avx512f")]
15457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15458#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15459#[rustc_legacy_const_generics(1)]
15460pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15461    unsafe {
15462        static_assert_extended_rounding!(ROUNDING);
15463        let a = a.as_f32x16();
15464        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15465        transmute(r)
15466    }
15467}
15468
15469/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15471///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15472///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15473///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15474///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15475///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15476///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15477///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15478///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15479///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15480///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15481///
15482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15483#[inline]
15484#[target_feature(enable = "avx512f")]
15485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15486#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15487#[rustc_legacy_const_generics(3)]
15488pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
15489    src: __m256i,
15490    k: __mmask16,
15491    a: __m512,
15492) -> __m256i {
15493    unsafe {
15494        static_assert_extended_rounding!(ROUNDING);
15495        let a = a.as_f32x16();
15496        let src = src.as_i16x16();
15497        let r = vcvtps2ph(a, ROUNDING, src, k);
15498        transmute(r)
15499    }
15500}
15501
15502/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15503/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15504///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15505///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15506///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15507///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15508///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15509///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15510///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15511///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15512///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15513///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15514///
15515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15516#[inline]
15517#[target_feature(enable = "avx512f")]
15518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15519#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15520#[rustc_legacy_const_generics(2)]
15521pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15522    unsafe {
15523        static_assert_extended_rounding!(ROUNDING);
15524        let a = a.as_f32x16();
15525        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15526        transmute(r)
15527    }
15528}
15529
15530/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15532/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15533/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15534/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15535/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15536/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15537///
15538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15539#[inline]
15540#[target_feature(enable = "avx512f,avx512vl")]
15541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15542#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15543#[rustc_legacy_const_generics(3)]
15544pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15545    src: __m128i,
15546    k: __mmask8,
15547    a: __m256,
15548) -> __m128i {
15549    unsafe {
15550        static_assert_uimm_bits!(IMM8, 8);
15551        let a = a.as_f32x8();
15552        let src = src.as_i16x8();
15553        let r = vcvtps2ph256(a, IMM8, src, k);
15554        transmute(r)
15555    }
15556}
15557
15558/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15559/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15565///
15566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15567#[inline]
15568#[target_feature(enable = "avx512f,avx512vl")]
15569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15570#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15571#[rustc_legacy_const_generics(2)]
15572pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15573    unsafe {
15574        static_assert_uimm_bits!(IMM8, 8);
15575        let a = a.as_f32x8();
15576        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15577        transmute(r)
15578    }
15579}
15580
15581/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15582/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15583/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15584/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15585/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15586/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15587/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15588///
15589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15590#[inline]
15591#[target_feature(enable = "avx512f,avx512vl")]
15592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15593#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15594#[rustc_legacy_const_generics(3)]
15595pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15596    unsafe {
15597        static_assert_uimm_bits!(IMM8, 8);
15598        let a = a.as_f32x4();
15599        let src = src.as_i16x8();
15600        let r = vcvtps2ph128(a, IMM8, src, k);
15601        transmute(r)
15602    }
15603}
15604
15605/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15606/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15607/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15608/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15609/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15610/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15611/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15612///
15613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15614#[inline]
15615#[target_feature(enable = "avx512f,avx512vl")]
15616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15617#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15618#[rustc_legacy_const_generics(2)]
15619pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15620    unsafe {
15621        static_assert_uimm_bits!(IMM8, 8);
15622        let a = a.as_f32x4();
15623        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15624        transmute(r)
15625    }
15626}
15627
15628/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15629/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15630///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15631///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15632///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15633///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15634///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15635///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15636///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15637///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15638///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15639///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15640///
15641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15642#[inline]
15643#[target_feature(enable = "avx512f")]
15644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15645#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15646#[rustc_legacy_const_generics(1)]
15647pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15648    unsafe {
15649        static_assert_extended_rounding!(ROUNDING);
15650        let a = a.as_f32x16();
15651        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, 0b11111111_11111111);
15652        transmute(r)
15653    }
15654}
15655
15656/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15658///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15659///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15660///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15661///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15662///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15663///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15664///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15665///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15666///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15667///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15668///
15669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15670#[inline]
15671#[target_feature(enable = "avx512f")]
15672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15673#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15674#[rustc_legacy_const_generics(3)]
15675pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15676    unsafe {
15677        static_assert_extended_rounding!(ROUNDING);
15678        let a = a.as_f32x16();
15679        let src = src.as_i16x16();
15680        let r = vcvtps2ph(a, ROUNDING, src, k);
15681        transmute(r)
15682    }
15683}
15684
15685/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15686/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15687///  * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15688///  * [`_MM_FROUND_TO_NEG_INF`]     // round down
15689///  * [`_MM_FROUND_TO_POS_INF`]    // round up
15690///  * [`_MM_FROUND_TO_ZERO`]        // truncate
15691///  * [`_MM_FROUND_CUR_DIRECTION`]    // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15692///  * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15693///  * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`]     // round down, and suppress exceptions
15694///  * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`]     // round up, and suppress exceptions
15695///  * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`]        // truncate, and suppress exceptions
15696///  * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`]  // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15697///
15698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15699#[inline]
15700#[target_feature(enable = "avx512f")]
15701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15702#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15703#[rustc_legacy_const_generics(2)]
15704pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15705    unsafe {
15706        static_assert_extended_rounding!(ROUNDING);
15707        let a = a.as_f32x16();
15708        let r = vcvtps2ph(a, ROUNDING, i16x16::ZERO, k);
15709        transmute(r)
15710    }
15711}
15712
15713/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15714/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15715/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15716/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15717/// * [`_MM_FROUND_TO_POS_INF`] : round up
15718/// * [`_MM_FROUND_TO_ZERO`] : truncate
15719/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15720///
15721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15722#[inline]
15723#[target_feature(enable = "avx512f,avx512vl")]
15724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15725#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15726#[rustc_legacy_const_generics(3)]
15727pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15728    unsafe {
15729        static_assert_uimm_bits!(IMM8, 8);
15730        let a = a.as_f32x8();
15731        let src = src.as_i16x8();
15732        let r = vcvtps2ph256(a, IMM8, src, k);
15733        transmute(r)
15734    }
15735}
15736
15737/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15738/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15739/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15740/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15741/// * [`_MM_FROUND_TO_POS_INF`] : round up
15742/// * [`_MM_FROUND_TO_ZERO`] : truncate
15743/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15744///
15745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15746#[inline]
15747#[target_feature(enable = "avx512f,avx512vl")]
15748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15749#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15750#[rustc_legacy_const_generics(2)]
15751pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15752    unsafe {
15753        static_assert_uimm_bits!(IMM8, 8);
15754        let a = a.as_f32x8();
15755        let r = vcvtps2ph256(a, IMM8, i16x8::ZERO, k);
15756        transmute(r)
15757    }
15758}
15759
15760/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15761/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15762/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15763/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15764/// * [`_MM_FROUND_TO_POS_INF`] : round up
15765/// * [`_MM_FROUND_TO_ZERO`] : truncate
15766/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15767///
15768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15769#[inline]
15770#[target_feature(enable = "avx512f,avx512vl")]
15771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15772#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15773#[rustc_legacy_const_generics(3)]
15774pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15775    unsafe {
15776        static_assert_uimm_bits!(IMM8, 8);
15777        let a = a.as_f32x4();
15778        let src = src.as_i16x8();
15779        let r = vcvtps2ph128(a, IMM8, src, k);
15780        transmute(r)
15781    }
15782}
15783
15784/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15785/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15786/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15787/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15788/// * [`_MM_FROUND_TO_POS_INF`] : round up
15789/// * [`_MM_FROUND_TO_ZERO`] : truncate
15790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15791///
15792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15793#[inline]
15794#[target_feature(enable = "avx512f,avx512vl")]
15795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15796#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15797#[rustc_legacy_const_generics(2)]
15798pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15799    unsafe {
15800        static_assert_uimm_bits!(IMM8, 8);
15801        let a = a.as_f32x4();
15802        let r = vcvtps2ph128(a, IMM8, i16x8::ZERO, k);
15803        transmute(r)
15804    }
15805}
15806
15807/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15808/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15809///
15810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15811#[inline]
15812#[target_feature(enable = "avx512f")]
15813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15814#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15815#[rustc_legacy_const_generics(1)]
15816pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15817    unsafe {
15818        static_assert_sae!(SAE);
15819        let a = a.as_i16x16();
15820        let r = vcvtph2ps(a, f32x16::ZERO, 0b11111111_11111111, SAE);
15821        transmute(r)
15822    }
15823}
15824
15825/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15826/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15827///
15828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15829#[inline]
15830#[target_feature(enable = "avx512f")]
15831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15832#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15833#[rustc_legacy_const_generics(3)]
15834pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15835    unsafe {
15836        static_assert_sae!(SAE);
15837        let a = a.as_i16x16();
15838        let src = src.as_f32x16();
15839        let r = vcvtph2ps(a, src, k, SAE);
15840        transmute(r)
15841    }
15842}
15843
15844/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15845/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15846///
15847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15848#[inline]
15849#[target_feature(enable = "avx512f")]
15850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15851#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15852#[rustc_legacy_const_generics(2)]
15853pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15854    unsafe {
15855        static_assert_sae!(SAE);
15856        let a = a.as_i16x16();
15857        let r = vcvtph2ps(a, f32x16::ZERO, k, SAE);
15858        transmute(r)
15859    }
15860}
15861
15862/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15863///
15864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15865#[inline]
15866#[target_feature(enable = "avx512f")]
15867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15868#[cfg_attr(test, assert_instr(vcvtph2ps))]
15869pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15870    unsafe {
15871        transmute(vcvtph2ps(
15872            a.as_i16x16(),
15873            f32x16::ZERO,
15874            0b11111111_11111111,
15875            _MM_FROUND_NO_EXC,
15876        ))
15877    }
15878}
15879
15880/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15881///
15882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15883#[inline]
15884#[target_feature(enable = "avx512f")]
15885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15886#[cfg_attr(test, assert_instr(vcvtph2ps))]
15887pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15888    unsafe {
15889        transmute(vcvtph2ps(
15890            a.as_i16x16(),
15891            src.as_f32x16(),
15892            k,
15893            _MM_FROUND_NO_EXC,
15894        ))
15895    }
15896}
15897
15898/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15899///
15900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15901#[inline]
15902#[target_feature(enable = "avx512f")]
15903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15904#[cfg_attr(test, assert_instr(vcvtph2ps))]
15905pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15906    unsafe { transmute(vcvtph2ps(a.as_i16x16(), f32x16::ZERO, k, _MM_FROUND_NO_EXC)) }
15907}
15908
15909/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15910///
15911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15912#[inline]
15913#[target_feature(enable = "avx512f,avx512vl")]
15914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15915#[cfg_attr(test, assert_instr(vcvtph2ps))]
15916pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15917    unsafe {
15918        let convert = _mm256_cvtph_ps(a);
15919        transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
15920    }
15921}
15922
15923/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15924///
15925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15926#[inline]
15927#[target_feature(enable = "avx512f,avx512vl")]
15928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15929#[cfg_attr(test, assert_instr(vcvtph2ps))]
15930pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15931    unsafe {
15932        let convert = _mm256_cvtph_ps(a);
15933        transmute(simd_select_bitmask(k, convert.as_f32x8(), f32x8::ZERO))
15934    }
15935}
15936
15937/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15938///
15939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15940#[inline]
15941#[target_feature(enable = "avx512f,avx512vl")]
15942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15943#[cfg_attr(test, assert_instr(vcvtph2ps))]
15944pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15945    unsafe {
15946        let convert = _mm_cvtph_ps(a);
15947        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
15948    }
15949}
15950
15951/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15952///
15953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15954#[inline]
15955#[target_feature(enable = "avx512f,avx512vl")]
15956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15957#[cfg_attr(test, assert_instr(vcvtph2ps))]
15958pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15959    unsafe {
15960        let convert = _mm_cvtph_ps(a);
15961        transmute(simd_select_bitmask(k, convert.as_f32x4(), f32x4::ZERO))
15962    }
15963}
15964
15965/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15967///
15968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15969#[inline]
15970#[target_feature(enable = "avx512f")]
15971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15972#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15973#[rustc_legacy_const_generics(1)]
15974pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15975    unsafe {
15976        static_assert_sae!(SAE);
15977        let a = a.as_f32x16();
15978        let r = vcvttps2dq(a, i32x16::ZERO, 0b11111111_11111111, SAE);
15979        transmute(r)
15980    }
15981}
15982
15983/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15984/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15985///
15986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15987#[inline]
15988#[target_feature(enable = "avx512f")]
15989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15990#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15991#[rustc_legacy_const_generics(3)]
15992pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15993    src: __m512i,
15994    k: __mmask16,
15995    a: __m512,
15996) -> __m512i {
15997    unsafe {
15998        static_assert_sae!(SAE);
15999        let a = a.as_f32x16();
16000        let src = src.as_i32x16();
16001        let r = vcvttps2dq(a, src, k, SAE);
16002        transmute(r)
16003    }
16004}
16005
16006/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16007/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16008///
16009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
16010#[inline]
16011#[target_feature(enable = "avx512f")]
16012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16013#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
16014#[rustc_legacy_const_generics(2)]
16015pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16016    unsafe {
16017        static_assert_sae!(SAE);
16018        let a = a.as_f32x16();
16019        let r = vcvttps2dq(a, i32x16::ZERO, k, SAE);
16020        transmute(r)
16021    }
16022}
16023
16024/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16026///
16027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
16028#[inline]
16029#[target_feature(enable = "avx512f")]
16030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16031#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16032#[rustc_legacy_const_generics(1)]
16033pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
16034    unsafe {
16035        static_assert_sae!(SAE);
16036        let a = a.as_f32x16();
16037        let r = vcvttps2udq(a, u32x16::ZERO, 0b11111111_11111111, SAE);
16038        transmute(r)
16039    }
16040}
16041
16042/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16043/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16044///
16045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
16046#[inline]
16047#[target_feature(enable = "avx512f")]
16048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16049#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16050#[rustc_legacy_const_generics(3)]
16051pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
16052    src: __m512i,
16053    k: __mmask16,
16054    a: __m512,
16055) -> __m512i {
16056    unsafe {
16057        static_assert_sae!(SAE);
16058        let a = a.as_f32x16();
16059        let src = src.as_u32x16();
16060        let r = vcvttps2udq(a, src, k, SAE);
16061        transmute(r)
16062    }
16063}
16064
16065/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16066/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16067///
16068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
16069#[inline]
16070#[target_feature(enable = "avx512f")]
16071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16072#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
16073#[rustc_legacy_const_generics(2)]
16074pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
16075    unsafe {
16076        static_assert_sae!(SAE);
16077        let a = a.as_f32x16();
16078        let r = vcvttps2udq(a, u32x16::ZERO, k, SAE);
16079        transmute(r)
16080    }
16081}
16082
16083/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
16084/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16085///
16086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
16087#[inline]
16088#[target_feature(enable = "avx512f")]
16089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16090#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16091#[rustc_legacy_const_generics(1)]
16092pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
16093    unsafe {
16094        static_assert_sae!(SAE);
16095        let a = a.as_f64x8();
16096        let r = vcvttpd2dq(a, i32x8::ZERO, 0b11111111, SAE);
16097        transmute(r)
16098    }
16099}
16100
16101/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16102/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16103///
16104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
16105#[inline]
16106#[target_feature(enable = "avx512f")]
16107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16108#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16109#[rustc_legacy_const_generics(3)]
16110pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
16111    src: __m256i,
16112    k: __mmask8,
16113    a: __m512d,
16114) -> __m256i {
16115    unsafe {
16116        static_assert_sae!(SAE);
16117        let a = a.as_f64x8();
16118        let src = src.as_i32x8();
16119        let r = vcvttpd2dq(a, src, k, SAE);
16120        transmute(r)
16121    }
16122}
16123
16124/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16125/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16126///
16127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
16128#[inline]
16129#[target_feature(enable = "avx512f")]
16130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16131#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
16132#[rustc_legacy_const_generics(2)]
16133pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16134    unsafe {
16135        static_assert_sae!(SAE);
16136        let a = a.as_f64x8();
16137        let r = vcvttpd2dq(a, i32x8::ZERO, k, SAE);
16138        transmute(r)
16139    }
16140}
16141
16142/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
16143/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16144///
16145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
16146#[inline]
16147#[target_feature(enable = "avx512f")]
16148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16149#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16150#[rustc_legacy_const_generics(1)]
16151pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
16152    unsafe {
16153        static_assert_sae!(SAE);
16154        let a = a.as_f64x8();
16155        let r = vcvttpd2udq(a, i32x8::ZERO, 0b11111111, SAE);
16156        transmute(r)
16157    }
16158}
16159
16160/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
16161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16162///
16163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
16164#[inline]
16165#[target_feature(enable = "avx512f")]
16166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16167#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16168#[rustc_legacy_const_generics(3)]
16169pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
16170    src: __m256i,
16171    k: __mmask8,
16172    a: __m512d,
16173) -> __m256i {
16174    unsafe {
16175        static_assert_sae!(SAE);
16176        let a = a.as_f64x8();
16177        let src = src.as_i32x8();
16178        let r = vcvttpd2udq(a, src, k, SAE);
16179        transmute(r)
16180    }
16181}
16182
16183/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16184///
16185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
16186#[inline]
16187#[target_feature(enable = "avx512f")]
16188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16189#[cfg_attr(test, assert_instr(vcvttps2dq))]
16190pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
16191    unsafe {
16192        transmute(vcvttps2dq(
16193            a.as_f32x16(),
16194            i32x16::ZERO,
16195            0b11111111_11111111,
16196            _MM_FROUND_CUR_DIRECTION,
16197        ))
16198    }
16199}
16200
16201/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16202///
16203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
16204#[inline]
16205#[target_feature(enable = "avx512f")]
16206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16207#[cfg_attr(test, assert_instr(vcvttps2dq))]
16208pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16209    unsafe {
16210        transmute(vcvttps2dq(
16211            a.as_f32x16(),
16212            src.as_i32x16(),
16213            k,
16214            _MM_FROUND_CUR_DIRECTION,
16215        ))
16216    }
16217}
16218
16219/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16220///
16221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
16222#[inline]
16223#[target_feature(enable = "avx512f")]
16224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16225#[cfg_attr(test, assert_instr(vcvttps2dq))]
16226pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
16227    unsafe {
16228        transmute(vcvttps2dq(
16229            a.as_f32x16(),
16230            i32x16::ZERO,
16231            k,
16232            _MM_FROUND_CUR_DIRECTION,
16233        ))
16234    }
16235}
16236
16237/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16238///
16239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
16240#[inline]
16241#[target_feature(enable = "avx512f,avx512vl")]
16242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16243#[cfg_attr(test, assert_instr(vcvttps2dq))]
16244pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16245    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
16246}
16247
16248/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16249///
16250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
16251#[inline]
16252#[target_feature(enable = "avx512f,avx512vl")]
16253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16254#[cfg_attr(test, assert_instr(vcvttps2dq))]
16255pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
16256    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
16257}
16258
16259/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16260///
16261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
16262#[inline]
16263#[target_feature(enable = "avx512f,avx512vl")]
16264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16265#[cfg_attr(test, assert_instr(vcvttps2dq))]
16266pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16267    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
16268}
16269
16270/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16271///
16272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
16273#[inline]
16274#[target_feature(enable = "avx512f,avx512vl")]
16275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16276#[cfg_attr(test, assert_instr(vcvttps2dq))]
16277pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
16278    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
16279}
16280
16281/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16282///
16283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
16284#[inline]
16285#[target_feature(enable = "avx512f")]
16286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16287#[cfg_attr(test, assert_instr(vcvttps2udq))]
16288pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
16289    unsafe {
16290        transmute(vcvttps2udq(
16291            a.as_f32x16(),
16292            u32x16::ZERO,
16293            0b11111111_11111111,
16294            _MM_FROUND_CUR_DIRECTION,
16295        ))
16296    }
16297}
16298
16299/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16300///
16301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
16302#[inline]
16303#[target_feature(enable = "avx512f")]
16304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16305#[cfg_attr(test, assert_instr(vcvttps2udq))]
16306pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
16307    unsafe {
16308        transmute(vcvttps2udq(
16309            a.as_f32x16(),
16310            src.as_u32x16(),
16311            k,
16312            _MM_FROUND_CUR_DIRECTION,
16313        ))
16314    }
16315}
16316
16317/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16318///
16319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
16320#[inline]
16321#[target_feature(enable = "avx512f")]
16322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16323#[cfg_attr(test, assert_instr(vcvttps2udq))]
16324pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
16325    unsafe {
16326        transmute(vcvttps2udq(
16327            a.as_f32x16(),
16328            u32x16::ZERO,
16329            k,
16330            _MM_FROUND_CUR_DIRECTION,
16331        ))
16332    }
16333}
16334
16335/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16336///
16337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
16338#[inline]
16339#[target_feature(enable = "avx512f,avx512vl")]
16340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16341#[cfg_attr(test, assert_instr(vcvttps2udq))]
16342pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
16343    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, 0b11111111)) }
16344}
16345
16346/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16347///
16348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
16349#[inline]
16350#[target_feature(enable = "avx512f,avx512vl")]
16351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16352#[cfg_attr(test, assert_instr(vcvttps2udq))]
16353pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
16354    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) }
16355}
16356
16357/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16358///
16359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
16360#[inline]
16361#[target_feature(enable = "avx512f,avx512vl")]
16362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16363#[cfg_attr(test, assert_instr(vcvttps2udq))]
16364pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
16365    unsafe { transmute(vcvttps2udq256(a.as_f32x8(), u32x8::ZERO, k)) }
16366}
16367
16368/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16369///
16370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
16371#[inline]
16372#[target_feature(enable = "avx512f,avx512vl")]
16373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16374#[cfg_attr(test, assert_instr(vcvttps2udq))]
16375pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
16376    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, 0b11111111)) }
16377}
16378
16379/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16380///
16381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
16382#[inline]
16383#[target_feature(enable = "avx512f,avx512vl")]
16384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16385#[cfg_attr(test, assert_instr(vcvttps2udq))]
16386pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
16387    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) }
16388}
16389
16390/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16391///
16392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
16393#[inline]
16394#[target_feature(enable = "avx512f,avx512vl")]
16395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16396#[cfg_attr(test, assert_instr(vcvttps2udq))]
16397pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
16398    unsafe { transmute(vcvttps2udq128(a.as_f32x4(), u32x4::ZERO, k)) }
16399}
16400
16401/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16402/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16403///
16404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
16405#[inline]
16406#[target_feature(enable = "avx512f")]
16407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16408#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16409#[rustc_legacy_const_generics(2)]
16410pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16411    unsafe {
16412        static_assert_sae!(SAE);
16413        let a = a.as_f64x8();
16414        let r = vcvttpd2udq(a, i32x8::ZERO, k, SAE);
16415        transmute(r)
16416    }
16417}
16418
16419/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16420///
16421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
16422#[inline]
16423#[target_feature(enable = "avx512f")]
16424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16425#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16426pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
16427    unsafe {
16428        transmute(vcvttpd2dq(
16429            a.as_f64x8(),
16430            i32x8::ZERO,
16431            0b11111111,
16432            _MM_FROUND_CUR_DIRECTION,
16433        ))
16434    }
16435}
16436
16437/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16438///
16439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
16440#[inline]
16441#[target_feature(enable = "avx512f")]
16442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16443#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16444pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16445    unsafe {
16446        transmute(vcvttpd2dq(
16447            a.as_f64x8(),
16448            src.as_i32x8(),
16449            k,
16450            _MM_FROUND_CUR_DIRECTION,
16451        ))
16452    }
16453}
16454
16455/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16456///
16457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16458#[inline]
16459#[target_feature(enable = "avx512f")]
16460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16461#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16462pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16463    unsafe {
16464        transmute(vcvttpd2dq(
16465            a.as_f64x8(),
16466            i32x8::ZERO,
16467            k,
16468            _MM_FROUND_CUR_DIRECTION,
16469        ))
16470    }
16471}
16472
16473/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16474///
16475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16476#[inline]
16477#[target_feature(enable = "avx512f,avx512vl")]
16478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16479#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16480pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16481    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
16482}
16483
16484/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16485///
16486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16487#[inline]
16488#[target_feature(enable = "avx512f,avx512vl")]
16489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16490#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16491pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16492    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
16493}
16494
16495/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16496///
16497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16498#[inline]
16499#[target_feature(enable = "avx512f,avx512vl")]
16500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16501#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16502pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16503    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) }
16504}
16505
16506/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16507///
16508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16509#[inline]
16510#[target_feature(enable = "avx512f,avx512vl")]
16511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16512#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16513pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16514    unsafe { transmute(vcvttpd2dq128(a.as_f64x2(), i32x4::ZERO, k)) }
16515}
16516
16517/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16518///
16519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16520#[inline]
16521#[target_feature(enable = "avx512f")]
16522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16523#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16524pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16525    unsafe {
16526        transmute(vcvttpd2udq(
16527            a.as_f64x8(),
16528            i32x8::ZERO,
16529            0b11111111,
16530            _MM_FROUND_CUR_DIRECTION,
16531        ))
16532    }
16533}
16534
16535/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16536///
16537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16538#[inline]
16539#[target_feature(enable = "avx512f")]
16540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16541#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16542pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16543    unsafe {
16544        transmute(vcvttpd2udq(
16545            a.as_f64x8(),
16546            src.as_i32x8(),
16547            k,
16548            _MM_FROUND_CUR_DIRECTION,
16549        ))
16550    }
16551}
16552
16553/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16554///
16555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16556#[inline]
16557#[target_feature(enable = "avx512f")]
16558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16559#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16560pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16561    unsafe {
16562        transmute(vcvttpd2udq(
16563            a.as_f64x8(),
16564            i32x8::ZERO,
16565            k,
16566            _MM_FROUND_CUR_DIRECTION,
16567        ))
16568    }
16569}
16570
16571/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16572///
16573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16574#[inline]
16575#[target_feature(enable = "avx512f,avx512vl")]
16576#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16577#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16578pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16579    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, 0b11111111)) }
16580}
16581
16582/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16583///
16584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16585#[inline]
16586#[target_feature(enable = "avx512f,avx512vl")]
16587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16588#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16589pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16590    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) }
16591}
16592
16593/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16594///
16595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16596#[inline]
16597#[target_feature(enable = "avx512f,avx512vl")]
16598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16599#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16600pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16601    unsafe { transmute(vcvttpd2udq256(a.as_f64x4(), i32x4::ZERO, k)) }
16602}
16603
16604/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16605///
16606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16607#[inline]
16608#[target_feature(enable = "avx512f,avx512vl")]
16609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16610#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16611pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16612    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, 0b11111111)) }
16613}
16614
16615/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16616///
16617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16618#[inline]
16619#[target_feature(enable = "avx512f,avx512vl")]
16620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16621#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16622pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16623    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) }
16624}
16625
16626/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16627///
16628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16629#[inline]
16630#[target_feature(enable = "avx512f,avx512vl")]
16631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16632#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16633pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16634    unsafe { transmute(vcvttpd2udq128(a.as_f64x2(), i32x4::ZERO, k)) }
16635}
16636
16637/// Returns vector of type `__m512d` with all elements set to zero.
16638///
16639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16640#[inline]
16641#[target_feature(enable = "avx512f")]
16642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16643#[cfg_attr(test, assert_instr(vxorps))]
16644#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16645pub const fn _mm512_setzero_pd() -> __m512d {
16646    // All-0 is a properly initialized __m512d
16647    unsafe { const { mem::zeroed() } }
16648}
16649
16650/// Returns vector of type `__m512` with all elements set to zero.
16651///
16652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16653#[inline]
16654#[target_feature(enable = "avx512f")]
16655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16656#[cfg_attr(test, assert_instr(vxorps))]
16657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16658pub const fn _mm512_setzero_ps() -> __m512 {
16659    // All-0 is a properly initialized __m512
16660    unsafe { const { mem::zeroed() } }
16661}
16662
16663/// Return vector of type `__m512` with all elements set to zero.
16664///
16665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16666#[inline]
16667#[target_feature(enable = "avx512f")]
16668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16669#[cfg_attr(test, assert_instr(vxorps))]
16670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16671pub const fn _mm512_setzero() -> __m512 {
16672    // All-0 is a properly initialized __m512
16673    unsafe { const { mem::zeroed() } }
16674}
16675
16676/// Returns vector of type `__m512i` with all elements set to zero.
16677///
16678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16679#[inline]
16680#[target_feature(enable = "avx512f")]
16681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16682#[cfg_attr(test, assert_instr(vxorps))]
16683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16684pub const fn _mm512_setzero_si512() -> __m512i {
16685    // All-0 is a properly initialized __m512i
16686    unsafe { const { mem::zeroed() } }
16687}
16688
16689/// Return vector of type `__m512i` with all elements set to zero.
16690///
16691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16692#[inline]
16693#[target_feature(enable = "avx512f")]
16694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16695#[cfg_attr(test, assert_instr(vxorps))]
16696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16697pub const fn _mm512_setzero_epi32() -> __m512i {
16698    // All-0 is a properly initialized __m512i
16699    unsafe { const { mem::zeroed() } }
16700}
16701
16702/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16703/// order.
16704///
16705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16706#[inline]
16707#[target_feature(enable = "avx512f")]
16708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16710pub const fn _mm512_setr_epi32(
16711    e15: i32,
16712    e14: i32,
16713    e13: i32,
16714    e12: i32,
16715    e11: i32,
16716    e10: i32,
16717    e9: i32,
16718    e8: i32,
16719    e7: i32,
16720    e6: i32,
16721    e5: i32,
16722    e4: i32,
16723    e3: i32,
16724    e2: i32,
16725    e1: i32,
16726    e0: i32,
16727) -> __m512i {
16728    unsafe {
16729        let r = i32x16::new(
16730            e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
16731        );
16732        transmute(r)
16733    }
16734}
16735
16736/// Set packed 8-bit integers in dst with the supplied values.
16737///
16738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16739#[inline]
16740#[target_feature(enable = "avx512f")]
16741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16743pub const fn _mm512_set_epi8(
16744    e63: i8,
16745    e62: i8,
16746    e61: i8,
16747    e60: i8,
16748    e59: i8,
16749    e58: i8,
16750    e57: i8,
16751    e56: i8,
16752    e55: i8,
16753    e54: i8,
16754    e53: i8,
16755    e52: i8,
16756    e51: i8,
16757    e50: i8,
16758    e49: i8,
16759    e48: i8,
16760    e47: i8,
16761    e46: i8,
16762    e45: i8,
16763    e44: i8,
16764    e43: i8,
16765    e42: i8,
16766    e41: i8,
16767    e40: i8,
16768    e39: i8,
16769    e38: i8,
16770    e37: i8,
16771    e36: i8,
16772    e35: i8,
16773    e34: i8,
16774    e33: i8,
16775    e32: i8,
16776    e31: i8,
16777    e30: i8,
16778    e29: i8,
16779    e28: i8,
16780    e27: i8,
16781    e26: i8,
16782    e25: i8,
16783    e24: i8,
16784    e23: i8,
16785    e22: i8,
16786    e21: i8,
16787    e20: i8,
16788    e19: i8,
16789    e18: i8,
16790    e17: i8,
16791    e16: i8,
16792    e15: i8,
16793    e14: i8,
16794    e13: i8,
16795    e12: i8,
16796    e11: i8,
16797    e10: i8,
16798    e9: i8,
16799    e8: i8,
16800    e7: i8,
16801    e6: i8,
16802    e5: i8,
16803    e4: i8,
16804    e3: i8,
16805    e2: i8,
16806    e1: i8,
16807    e0: i8,
16808) -> __m512i {
16809    unsafe {
16810        let r = i8x64::new(
16811            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16812            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
16813            e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
16814            e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
16815        );
16816        transmute(r)
16817    }
16818}
16819
16820/// Set packed 16-bit integers in dst with the supplied values.
16821///
16822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16823#[inline]
16824#[target_feature(enable = "avx512f")]
16825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16827pub const fn _mm512_set_epi16(
16828    e31: i16,
16829    e30: i16,
16830    e29: i16,
16831    e28: i16,
16832    e27: i16,
16833    e26: i16,
16834    e25: i16,
16835    e24: i16,
16836    e23: i16,
16837    e22: i16,
16838    e21: i16,
16839    e20: i16,
16840    e19: i16,
16841    e18: i16,
16842    e17: i16,
16843    e16: i16,
16844    e15: i16,
16845    e14: i16,
16846    e13: i16,
16847    e12: i16,
16848    e11: i16,
16849    e10: i16,
16850    e9: i16,
16851    e8: i16,
16852    e7: i16,
16853    e6: i16,
16854    e5: i16,
16855    e4: i16,
16856    e3: i16,
16857    e2: i16,
16858    e1: i16,
16859    e0: i16,
16860) -> __m512i {
16861    unsafe {
16862        let r = i16x32::new(
16863            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
16864            e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
16865        );
16866        transmute(r)
16867    }
16868}
16869
16870/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16871///
16872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16873#[inline]
16874#[target_feature(enable = "avx512f")]
16875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16877pub const fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16878    _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16879}
16880
16881/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16882///
16883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16884#[inline]
16885#[target_feature(enable = "avx512f")]
16886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16888pub const fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16889    _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
16890}
16891
16892/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16893///
16894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16895#[inline]
16896#[target_feature(enable = "avx512f")]
16897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16899pub const fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16900    _mm512_set_pd(d, c, b, a, d, c, b, a)
16901}
16902
16903/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16904///
16905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16906#[inline]
16907#[target_feature(enable = "avx512f")]
16908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16910pub const fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16911    _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16912}
16913
16914/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16915///
16916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16917#[inline]
16918#[target_feature(enable = "avx512f")]
16919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16921pub const fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16922    _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
16923}
16924
16925/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16926///
16927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16928#[inline]
16929#[target_feature(enable = "avx512f")]
16930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16931#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16932pub const fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16933    _mm512_set_pd(a, b, c, d, a, b, c, d)
16934}
16935
16936/// Set packed 64-bit integers in dst with the supplied values.
16937///
16938/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16939#[inline]
16940#[target_feature(enable = "avx512f")]
16941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16942#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16943pub const fn _mm512_set_epi64(
16944    e0: i64,
16945    e1: i64,
16946    e2: i64,
16947    e3: i64,
16948    e4: i64,
16949    e5: i64,
16950    e6: i64,
16951    e7: i64,
16952) -> __m512i {
16953    _mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
16954}
16955
16956/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16957///
16958/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16959#[inline]
16960#[target_feature(enable = "avx512f")]
16961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16962#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
16963pub const fn _mm512_setr_epi64(
16964    e0: i64,
16965    e1: i64,
16966    e2: i64,
16967    e3: i64,
16968    e4: i64,
16969    e5: i64,
16970    e6: i64,
16971    e7: i64,
16972) -> __m512i {
16973    unsafe {
16974        let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
16975        transmute(r)
16976    }
16977}
16978
16979/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16980///
16981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16982#[inline]
16983#[target_feature(enable = "avx512f")]
16984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16985#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16986#[rustc_legacy_const_generics(2)]
16987pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16988    offsets: __m256i,
16989    slice: *const f64,
16990) -> __m512d {
16991    static_assert_imm8_scale!(SCALE);
16992    let zero = f64x8::ZERO;
16993    let neg_one = -1;
16994    let slice = slice as *const i8;
16995    let offsets = offsets.as_i32x8();
16996    let r = vgatherdpd(zero, slice, offsets, neg_one, SCALE);
16997    transmute(r)
16998}
16999
17000/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17001///
17002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
17003#[inline]
17004#[target_feature(enable = "avx512f")]
17005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17006#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17007#[rustc_legacy_const_generics(4)]
17008pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
17009    src: __m512d,
17010    mask: __mmask8,
17011    offsets: __m256i,
17012    slice: *const f64,
17013) -> __m512d {
17014    static_assert_imm8_scale!(SCALE);
17015    let src = src.as_f64x8();
17016    let slice = slice as *const i8;
17017    let offsets = offsets.as_i32x8();
17018    let r = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
17019    transmute(r)
17020}
17021
17022/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17023///
17024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
17025#[inline]
17026#[target_feature(enable = "avx512f")]
17027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17028#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17029#[rustc_legacy_const_generics(2)]
17030pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
17031    offsets: __m512i,
17032    slice: *const f64,
17033) -> __m512d {
17034    static_assert_imm8_scale!(SCALE);
17035    let zero = f64x8::ZERO;
17036    let neg_one = -1;
17037    let slice = slice as *const i8;
17038    let offsets = offsets.as_i64x8();
17039    let r = vgatherqpd(zero, slice, offsets, neg_one, SCALE);
17040    transmute(r)
17041}
17042
17043/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17044///
17045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
17046#[inline]
17047#[target_feature(enable = "avx512f")]
17048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17049#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17050#[rustc_legacy_const_generics(4)]
17051pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
17052    src: __m512d,
17053    mask: __mmask8,
17054    offsets: __m512i,
17055    slice: *const f64,
17056) -> __m512d {
17057    static_assert_imm8_scale!(SCALE);
17058    let src = src.as_f64x8();
17059    let slice = slice as *const i8;
17060    let offsets = offsets.as_i64x8();
17061    let r = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
17062    transmute(r)
17063}
17064
17065/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17066///
17067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
17068#[inline]
17069#[target_feature(enable = "avx512f")]
17070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17071#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17072#[rustc_legacy_const_generics(2)]
17073pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
17074    static_assert_imm8_scale!(SCALE);
17075    let zero = f32x8::ZERO;
17076    let neg_one = -1;
17077    let slice = slice as *const i8;
17078    let offsets = offsets.as_i64x8();
17079    let r = vgatherqps(zero, slice, offsets, neg_one, SCALE);
17080    transmute(r)
17081}
17082
17083/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17084///
17085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
17086#[inline]
17087#[target_feature(enable = "avx512f")]
17088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17089#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17090#[rustc_legacy_const_generics(4)]
17091pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
17092    src: __m256,
17093    mask: __mmask8,
17094    offsets: __m512i,
17095    slice: *const f32,
17096) -> __m256 {
17097    static_assert_imm8_scale!(SCALE);
17098    let src = src.as_f32x8();
17099    let slice = slice as *const i8;
17100    let offsets = offsets.as_i64x8();
17101    let r = vgatherqps(src, slice, offsets, mask as i8, SCALE);
17102    transmute(r)
17103}
17104
17105/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17106///
17107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
17108#[inline]
17109#[target_feature(enable = "avx512f")]
17110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17111#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17112#[rustc_legacy_const_generics(2)]
17113pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
17114    static_assert_imm8_scale!(SCALE);
17115    let zero = f32x16::ZERO;
17116    let neg_one = -1;
17117    let slice = slice as *const i8;
17118    let offsets = offsets.as_i32x16();
17119    let r = vgatherdps(zero, slice, offsets, neg_one, SCALE);
17120    transmute(r)
17121}
17122
17123/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17124///
17125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
17126#[inline]
17127#[target_feature(enable = "avx512f")]
17128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17129#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17130#[rustc_legacy_const_generics(4)]
17131pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
17132    src: __m512,
17133    mask: __mmask16,
17134    offsets: __m512i,
17135    slice: *const f32,
17136) -> __m512 {
17137    static_assert_imm8_scale!(SCALE);
17138    let src = src.as_f32x16();
17139    let slice = slice as *const i8;
17140    let offsets = offsets.as_i32x16();
17141    let r = vgatherdps(src, slice, offsets, mask as i16, SCALE);
17142    transmute(r)
17143}
17144
17145/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17146///
17147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
17148#[inline]
17149#[target_feature(enable = "avx512f")]
17150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17151#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17152#[rustc_legacy_const_generics(2)]
17153pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
17154    offsets: __m512i,
17155    slice: *const i32,
17156) -> __m512i {
17157    static_assert_imm8_scale!(SCALE);
17158    let zero = i32x16::ZERO;
17159    let neg_one = -1;
17160    let slice = slice as *const i8;
17161    let offsets = offsets.as_i32x16();
17162    let r = vpgatherdd(zero, slice, offsets, neg_one, SCALE);
17163    transmute(r)
17164}
17165
17166/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17167///
17168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
17169#[inline]
17170#[target_feature(enable = "avx512f")]
17171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17172#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17173#[rustc_legacy_const_generics(4)]
17174pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
17175    src: __m512i,
17176    mask: __mmask16,
17177    offsets: __m512i,
17178    slice: *const i32,
17179) -> __m512i {
17180    static_assert_imm8_scale!(SCALE);
17181    let src = src.as_i32x16();
17182    let mask = mask as i16;
17183    let slice = slice as *const i8;
17184    let offsets = offsets.as_i32x16();
17185    let r = vpgatherdd(src, slice, offsets, mask, SCALE);
17186    transmute(r)
17187}
17188
17189/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17190///
17191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
17192#[inline]
17193#[target_feature(enable = "avx512f")]
17194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17195#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17196#[rustc_legacy_const_generics(2)]
17197pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
17198    offsets: __m256i,
17199    slice: *const i64,
17200) -> __m512i {
17201    static_assert_imm8_scale!(SCALE);
17202    let zero = i64x8::ZERO;
17203    let neg_one = -1;
17204    let slice = slice as *const i8;
17205    let offsets = offsets.as_i32x8();
17206    let r = vpgatherdq(zero, slice, offsets, neg_one, SCALE);
17207    transmute(r)
17208}
17209
17210/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17211///
17212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
17213#[inline]
17214#[target_feature(enable = "avx512f")]
17215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17216#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17217#[rustc_legacy_const_generics(4)]
17218pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
17219    src: __m512i,
17220    mask: __mmask8,
17221    offsets: __m256i,
17222    slice: *const i64,
17223) -> __m512i {
17224    static_assert_imm8_scale!(SCALE);
17225    let src = src.as_i64x8();
17226    let mask = mask as i8;
17227    let slice = slice as *const i8;
17228    let offsets = offsets.as_i32x8();
17229    let r = vpgatherdq(src, slice, offsets, mask, SCALE);
17230    transmute(r)
17231}
17232
17233/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17234///
17235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
17236#[inline]
17237#[target_feature(enable = "avx512f")]
17238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17239#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17240#[rustc_legacy_const_generics(2)]
17241pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
17242    offsets: __m512i,
17243    slice: *const i64,
17244) -> __m512i {
17245    static_assert_imm8_scale!(SCALE);
17246    let zero = i64x8::ZERO;
17247    let neg_one = -1;
17248    let slice = slice as *const i8;
17249    let offsets = offsets.as_i64x8();
17250    let r = vpgatherqq(zero, slice, offsets, neg_one, SCALE);
17251    transmute(r)
17252}
17253
17254/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17255///
17256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
17257#[inline]
17258#[target_feature(enable = "avx512f")]
17259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17260#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17261#[rustc_legacy_const_generics(4)]
17262pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
17263    src: __m512i,
17264    mask: __mmask8,
17265    offsets: __m512i,
17266    slice: *const i64,
17267) -> __m512i {
17268    static_assert_imm8_scale!(SCALE);
17269    let src = src.as_i64x8();
17270    let mask = mask as i8;
17271    let slice = slice as *const i8;
17272    let offsets = offsets.as_i64x8();
17273    let r = vpgatherqq(src, slice, offsets, mask, SCALE);
17274    transmute(r)
17275}
17276
17277/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
17278///
17279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
17280#[inline]
17281#[target_feature(enable = "avx512f")]
17282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17283#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17284#[rustc_legacy_const_generics(2)]
17285pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
17286    offsets: __m512i,
17287    slice: *const i32,
17288) -> __m256i {
17289    static_assert_imm8_scale!(SCALE);
17290    let zeros = i32x8::ZERO;
17291    let neg_one = -1;
17292    let slice = slice as *const i8;
17293    let offsets = offsets.as_i64x8();
17294    let r = vpgatherqd(zeros, slice, offsets, neg_one, SCALE);
17295    transmute(r)
17296}
17297
17298/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17299///
17300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
17301#[inline]
17302#[target_feature(enable = "avx512f")]
17303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17304#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17305#[rustc_legacy_const_generics(4)]
17306pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
17307    src: __m256i,
17308    mask: __mmask8,
17309    offsets: __m512i,
17310    slice: *const i32,
17311) -> __m256i {
17312    static_assert_imm8_scale!(SCALE);
17313    let src = src.as_i32x8();
17314    let mask = mask as i8;
17315    let slice = slice as *const i8;
17316    let offsets = offsets.as_i64x8();
17317    let r = vpgatherqd(src, slice, offsets, mask, SCALE);
17318    transmute(r)
17319}
17320
17321/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17322///
17323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
17324#[inline]
17325#[target_feature(enable = "avx512f")]
17326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17327#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17328#[rustc_legacy_const_generics(3)]
17329pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
17330    slice: *mut f64,
17331    offsets: __m256i,
17332    src: __m512d,
17333) {
17334    static_assert_imm8_scale!(SCALE);
17335    let src = src.as_f64x8();
17336    let neg_one = -1;
17337    let slice = slice as *mut i8;
17338    let offsets = offsets.as_i32x8();
17339    vscatterdpd(slice, neg_one, offsets, src, SCALE);
17340}
17341
17342/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17343///
17344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
17345#[inline]
17346#[target_feature(enable = "avx512f")]
17347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17348#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17349#[rustc_legacy_const_generics(4)]
17350pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
17351    slice: *mut f64,
17352    mask: __mmask8,
17353    offsets: __m256i,
17354    src: __m512d,
17355) {
17356    static_assert_imm8_scale!(SCALE);
17357    let src = src.as_f64x8();
17358    let slice = slice as *mut i8;
17359    let offsets = offsets.as_i32x8();
17360    vscatterdpd(slice, mask as i8, offsets, src, SCALE);
17361}
17362
17363/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17364///
17365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
17366#[inline]
17367#[target_feature(enable = "avx512f")]
17368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17369#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17370#[rustc_legacy_const_generics(3)]
17371pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
17372    slice: *mut f64,
17373    offsets: __m512i,
17374    src: __m512d,
17375) {
17376    static_assert_imm8_scale!(SCALE);
17377    let src = src.as_f64x8();
17378    let neg_one = -1;
17379    let slice = slice as *mut i8;
17380    let offsets = offsets.as_i64x8();
17381    vscatterqpd(slice, neg_one, offsets, src, SCALE);
17382}
17383
17384/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17385///
17386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
17387#[inline]
17388#[target_feature(enable = "avx512f")]
17389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17390#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17391#[rustc_legacy_const_generics(4)]
17392pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
17393    slice: *mut f64,
17394    mask: __mmask8,
17395    offsets: __m512i,
17396    src: __m512d,
17397) {
17398    static_assert_imm8_scale!(SCALE);
17399    let src = src.as_f64x8();
17400    let slice = slice as *mut i8;
17401    let offsets = offsets.as_i64x8();
17402    vscatterqpd(slice, mask as i8, offsets, src, SCALE);
17403}
17404
17405/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17406///
17407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
17408#[inline]
17409#[target_feature(enable = "avx512f")]
17410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17411#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17412#[rustc_legacy_const_generics(3)]
17413pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
17414    slice: *mut f32,
17415    offsets: __m512i,
17416    src: __m512,
17417) {
17418    static_assert_imm8_scale!(SCALE);
17419    let src = src.as_f32x16();
17420    let neg_one = -1;
17421    let slice = slice as *mut i8;
17422    let offsets = offsets.as_i32x16();
17423    vscatterdps(slice, neg_one, offsets, src, SCALE);
17424}
17425
17426/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17427///
17428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
17429#[inline]
17430#[target_feature(enable = "avx512f")]
17431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17432#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17433#[rustc_legacy_const_generics(4)]
17434pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
17435    slice: *mut f32,
17436    mask: __mmask16,
17437    offsets: __m512i,
17438    src: __m512,
17439) {
17440    static_assert_imm8_scale!(SCALE);
17441    let src = src.as_f32x16();
17442    let slice = slice as *mut i8;
17443    let offsets = offsets.as_i32x16();
17444    vscatterdps(slice, mask as i16, offsets, src, SCALE);
17445}
17446
17447/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17448///
17449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
17450#[inline]
17451#[target_feature(enable = "avx512f")]
17452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17453#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17454#[rustc_legacy_const_generics(3)]
17455pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
17456    slice: *mut f32,
17457    offsets: __m512i,
17458    src: __m256,
17459) {
17460    static_assert_imm8_scale!(SCALE);
17461    let src = src.as_f32x8();
17462    let neg_one = -1;
17463    let slice = slice as *mut i8;
17464    let offsets = offsets.as_i64x8();
17465    vscatterqps(slice, neg_one, offsets, src, SCALE);
17466}
17467
17468/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17469///
17470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17471#[inline]
17472#[target_feature(enable = "avx512f")]
17473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17474#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17475#[rustc_legacy_const_generics(4)]
17476pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17477    slice: *mut f32,
17478    mask: __mmask8,
17479    offsets: __m512i,
17480    src: __m256,
17481) {
17482    static_assert_imm8_scale!(SCALE);
17483    let src = src.as_f32x8();
17484    let slice = slice as *mut i8;
17485    let offsets = offsets.as_i64x8();
17486    vscatterqps(slice, mask as i8, offsets, src, SCALE);
17487}
17488
17489/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17490///
17491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17492#[inline]
17493#[target_feature(enable = "avx512f")]
17494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17495#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17496#[rustc_legacy_const_generics(3)]
17497pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17498    slice: *mut i64,
17499    offsets: __m256i,
17500    src: __m512i,
17501) {
17502    static_assert_imm8_scale!(SCALE);
17503    let src = src.as_i64x8();
17504    let neg_one = -1;
17505    let slice = slice as *mut i8;
17506    let offsets = offsets.as_i32x8();
17507    vpscatterdq(slice, neg_one, offsets, src, SCALE);
17508}
17509
17510/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17511///
17512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17513#[inline]
17514#[target_feature(enable = "avx512f")]
17515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17516#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17517#[rustc_legacy_const_generics(4)]
17518pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17519    slice: *mut i64,
17520    mask: __mmask8,
17521    offsets: __m256i,
17522    src: __m512i,
17523) {
17524    static_assert_imm8_scale!(SCALE);
17525    let src = src.as_i64x8();
17526    let mask = mask as i8;
17527    let slice = slice as *mut i8;
17528    let offsets = offsets.as_i32x8();
17529    vpscatterdq(slice, mask, offsets, src, SCALE);
17530}
17531
17532/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17533///
17534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17535#[inline]
17536#[target_feature(enable = "avx512f")]
17537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17538#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17539#[rustc_legacy_const_generics(3)]
17540pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17541    slice: *mut i64,
17542    offsets: __m512i,
17543    src: __m512i,
17544) {
17545    static_assert_imm8_scale!(SCALE);
17546    let src = src.as_i64x8();
17547    let neg_one = -1;
17548    let slice = slice as *mut i8;
17549    let offsets = offsets.as_i64x8();
17550    vpscatterqq(slice, neg_one, offsets, src, SCALE);
17551}
17552
17553/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17554///
17555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17556#[inline]
17557#[target_feature(enable = "avx512f")]
17558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17559#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17560#[rustc_legacy_const_generics(4)]
17561pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17562    slice: *mut i64,
17563    mask: __mmask8,
17564    offsets: __m512i,
17565    src: __m512i,
17566) {
17567    static_assert_imm8_scale!(SCALE);
17568    let src = src.as_i64x8();
17569    let mask = mask as i8;
17570    let slice = slice as *mut i8;
17571    let offsets = offsets.as_i64x8();
17572    vpscatterqq(slice, mask, offsets, src, SCALE);
17573}
17574
17575/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17576///
17577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17578#[inline]
17579#[target_feature(enable = "avx512f")]
17580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17581#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17582#[rustc_legacy_const_generics(3)]
17583pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17584    slice: *mut i32,
17585    offsets: __m512i,
17586    src: __m512i,
17587) {
17588    static_assert_imm8_scale!(SCALE);
17589    let src = src.as_i32x16();
17590    let neg_one = -1;
17591    let slice = slice as *mut i8;
17592    let offsets = offsets.as_i32x16();
17593    vpscatterdd(slice, neg_one, offsets, src, SCALE);
17594}
17595
17596/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17597///
17598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17599#[inline]
17600#[target_feature(enable = "avx512f")]
17601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17602#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17603#[rustc_legacy_const_generics(4)]
17604pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17605    slice: *mut i32,
17606    mask: __mmask16,
17607    offsets: __m512i,
17608    src: __m512i,
17609) {
17610    static_assert_imm8_scale!(SCALE);
17611    let src = src.as_i32x16();
17612    let mask = mask as i16;
17613    let slice = slice as *mut i8;
17614    let offsets = offsets.as_i32x16();
17615    vpscatterdd(slice, mask, offsets, src, SCALE);
17616}
17617
17618/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17619///
17620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17621#[inline]
17622#[target_feature(enable = "avx512f")]
17623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17624#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17625#[rustc_legacy_const_generics(3)]
17626pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17627    slice: *mut i32,
17628    offsets: __m512i,
17629    src: __m256i,
17630) {
17631    static_assert_imm8_scale!(SCALE);
17632    let src = src.as_i32x8();
17633    let neg_one = -1;
17634    let slice = slice as *mut i8;
17635    let offsets = offsets.as_i64x8();
17636    vpscatterqd(slice, neg_one, offsets, src, SCALE);
17637}
17638
17639/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17640///
17641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17642#[inline]
17643#[target_feature(enable = "avx512f")]
17644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17645#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17646#[rustc_legacy_const_generics(4)]
17647pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17648    slice: *mut i32,
17649    mask: __mmask8,
17650    offsets: __m512i,
17651    src: __m256i,
17652) {
17653    static_assert_imm8_scale!(SCALE);
17654    let src = src.as_i32x8();
17655    let mask = mask as i8;
17656    let slice = slice as *mut i8;
17657    let offsets = offsets.as_i64x8();
17658    vpscatterqd(slice, mask, offsets, src, SCALE);
17659}
17660
17661/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17662/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17663///
17664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17665#[inline]
17666#[target_feature(enable = "avx512f")]
17667#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17668#[rustc_legacy_const_generics(2)]
17669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17670pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17671    vindex: __m512i,
17672    base_addr: *const i64,
17673) -> __m512i {
17674    _mm512_i32gather_epi64::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17675}
17676
17677/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17678/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17679/// (elements are copied from src when the corresponding mask bit is not set).
17680///
17681/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17682#[inline]
17683#[target_feature(enable = "avx512f")]
17684#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17685#[rustc_legacy_const_generics(4)]
17686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17687pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17688    src: __m512i,
17689    k: __mmask8,
17690    vindex: __m512i,
17691    base_addr: *const i64,
17692) -> __m512i {
17693    _mm512_mask_i32gather_epi64::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17694}
17695
17696/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17697/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17698///
17699/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17700#[inline]
17701#[target_feature(enable = "avx512f")]
17702#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17703#[rustc_legacy_const_generics(2)]
17704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17705pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17706    vindex: __m512i,
17707    base_addr: *const f64,
17708) -> __m512d {
17709    _mm512_i32gather_pd::<SCALE>(_mm512_castsi512_si256(vindex), base_addr)
17710}
17711
17712/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17713/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17714/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17715///
17716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17717#[inline]
17718#[target_feature(enable = "avx512f")]
17719#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17720#[rustc_legacy_const_generics(4)]
17721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17722pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17723    src: __m512d,
17724    k: __mmask8,
17725    vindex: __m512i,
17726    base_addr: *const f64,
17727) -> __m512d {
17728    _mm512_mask_i32gather_pd::<SCALE>(src, k, _mm512_castsi512_si256(vindex), base_addr)
17729}
17730
17731/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17732/// indices stored in the lower half of vindex scaled by scale.
17733///
17734/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17735#[inline]
17736#[target_feature(enable = "avx512f")]
17737#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17738#[rustc_legacy_const_generics(3)]
17739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17740pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17741    base_addr: *mut i64,
17742    vindex: __m512i,
17743    a: __m512i,
17744) {
17745    _mm512_i32scatter_epi64::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17746}
17747
17748/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17749/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17750/// mask bit is not set are not written to memory).
17751///
17752/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17753#[inline]
17754#[target_feature(enable = "avx512f")]
17755#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17756#[rustc_legacy_const_generics(4)]
17757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17758pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17759    base_addr: *mut i64,
17760    k: __mmask8,
17761    vindex: __m512i,
17762    a: __m512i,
17763) {
17764    _mm512_mask_i32scatter_epi64::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17765}
17766
17767/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17768/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17769///
17770/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17771#[inline]
17772#[target_feature(enable = "avx512f")]
17773#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17774#[rustc_legacy_const_generics(3)]
17775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17776pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17777    base_addr: *mut f64,
17778    vindex: __m512i,
17779    a: __m512d,
17780) {
17781    _mm512_i32scatter_pd::<SCALE>(base_addr, _mm512_castsi512_si256(vindex), a)
17782}
17783
17784/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17785/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17786/// (elements whose corresponding mask bit is not set are not written to memory).
17787///
17788/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17789#[inline]
17790#[target_feature(enable = "avx512f")]
17791#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17792#[rustc_legacy_const_generics(4)]
17793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17794pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17795    base_addr: *mut f64,
17796    k: __mmask8,
17797    vindex: __m512i,
17798    a: __m512d,
17799) {
17800    _mm512_mask_i32scatter_pd::<SCALE>(base_addr, k, _mm512_castsi512_si256(vindex), a)
17801}
17802
17803/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17804/// indices stored in vindex scaled by scale
17805///
17806/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17807#[inline]
17808#[target_feature(enable = "avx512f,avx512vl")]
17809#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17810#[rustc_legacy_const_generics(3)]
17811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17812pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17813    base_addr: *mut i32,
17814    vindex: __m256i,
17815    a: __m256i,
17816) {
17817    static_assert_imm8_scale!(SCALE);
17818    vpscatterdd_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17819}
17820
17821/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17822/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17823/// are not written to memory).
17824///
17825/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17826#[inline]
17827#[target_feature(enable = "avx512f,avx512vl")]
17828#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17829#[rustc_legacy_const_generics(4)]
17830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17831pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17832    base_addr: *mut i32,
17833    k: __mmask8,
17834    vindex: __m256i,
17835    a: __m256i,
17836) {
17837    static_assert_imm8_scale!(SCALE);
17838    vpscatterdd_256(base_addr as _, k, vindex.as_i32x8(), a.as_i32x8(), SCALE)
17839}
17840
17841/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17842///
17843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17844#[inline]
17845#[target_feature(enable = "avx512f,avx512vl")]
17846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17847#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17848#[rustc_legacy_const_generics(3)]
17849pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17850    slice: *mut i64,
17851    offsets: __m128i,
17852    src: __m256i,
17853) {
17854    static_assert_imm8_scale!(SCALE);
17855    let src = src.as_i64x4();
17856    let slice = slice as *mut i8;
17857    let offsets = offsets.as_i32x4();
17858    vpscatterdq_256(slice, 0xff, offsets, src, SCALE);
17859}
17860
17861/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17862/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17863/// are not written to memory).
17864///
17865/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17866#[inline]
17867#[target_feature(enable = "avx512f,avx512vl")]
17868#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17869#[rustc_legacy_const_generics(4)]
17870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17871pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17872    base_addr: *mut i64,
17873    k: __mmask8,
17874    vindex: __m128i,
17875    a: __m256i,
17876) {
17877    static_assert_imm8_scale!(SCALE);
17878    vpscatterdq_256(base_addr as _, k, vindex.as_i32x4(), a.as_i64x4(), SCALE)
17879}
17880
17881/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17882/// at packed 32-bit integer indices stored in vindex scaled by scale
17883///
17884/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17885#[inline]
17886#[target_feature(enable = "avx512f,avx512vl")]
17887#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17888#[rustc_legacy_const_generics(3)]
17889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17890pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17891    base_addr: *mut f64,
17892    vindex: __m128i,
17893    a: __m256d,
17894) {
17895    static_assert_imm8_scale!(SCALE);
17896    vscatterdpd_256(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17897}
17898
17899/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17900/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17901/// mask bit is not set are not written to memory).
17902///
17903/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17904#[inline]
17905#[target_feature(enable = "avx512f,avx512vl")]
17906#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17907#[rustc_legacy_const_generics(4)]
17908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17909pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17910    base_addr: *mut f64,
17911    k: __mmask8,
17912    vindex: __m128i,
17913    a: __m256d,
17914) {
17915    static_assert_imm8_scale!(SCALE);
17916    vscatterdpd_256(base_addr as _, k, vindex.as_i32x4(), a.as_f64x4(), SCALE)
17917}
17918
17919/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17920/// at packed 32-bit integer indices stored in vindex scaled by scale
17921///
17922/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17923#[inline]
17924#[target_feature(enable = "avx512f,avx512vl")]
17925#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17926#[rustc_legacy_const_generics(3)]
17927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17928pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17929    base_addr: *mut f32,
17930    vindex: __m256i,
17931    a: __m256,
17932) {
17933    static_assert_imm8_scale!(SCALE);
17934    vscatterdps_256(base_addr as _, 0xff, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17935}
17936
17937/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17938/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17939/// mask bit is not set are not written to memory).
17940///
17941/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17942#[inline]
17943#[target_feature(enable = "avx512f,avx512vl")]
17944#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17945#[rustc_legacy_const_generics(4)]
17946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17947pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17948    base_addr: *mut f32,
17949    k: __mmask8,
17950    vindex: __m256i,
17951    a: __m256,
17952) {
17953    static_assert_imm8_scale!(SCALE);
17954    vscatterdps_256(base_addr as _, k, vindex.as_i32x8(), a.as_f32x8(), SCALE)
17955}
17956
17957/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17958/// indices stored in vindex scaled by scale
17959///
17960/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17961#[inline]
17962#[target_feature(enable = "avx512f,avx512vl")]
17963#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17964#[rustc_legacy_const_generics(3)]
17965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17966pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17967    base_addr: *mut i32,
17968    vindex: __m256i,
17969    a: __m128i,
17970) {
17971    static_assert_imm8_scale!(SCALE);
17972    vpscatterqd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17973}
17974
17975/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17976/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17977/// are not written to memory).
17978///
17979/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17980#[inline]
17981#[target_feature(enable = "avx512f,avx512vl")]
17982#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17983#[rustc_legacy_const_generics(4)]
17984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17985pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17986    base_addr: *mut i32,
17987    k: __mmask8,
17988    vindex: __m256i,
17989    a: __m128i,
17990) {
17991    static_assert_imm8_scale!(SCALE);
17992    vpscatterqd_256(base_addr as _, k, vindex.as_i64x4(), a.as_i32x4(), SCALE)
17993}
17994
17995/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17996/// indices stored in vindex scaled by scale
17997///
17998/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17999#[inline]
18000#[target_feature(enable = "avx512f,avx512vl")]
18001#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18002#[rustc_legacy_const_generics(3)]
18003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18004pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
18005    base_addr: *mut i64,
18006    vindex: __m256i,
18007    a: __m256i,
18008) {
18009    static_assert_imm8_scale!(SCALE);
18010    vpscatterqq_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_i64x4(), SCALE)
18011}
18012
18013/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18014/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18015/// are not written to memory).
18016///
18017/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
18018#[inline]
18019#[target_feature(enable = "avx512f,avx512vl")]
18020#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18021#[rustc_legacy_const_generics(4)]
18022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18023pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
18024    base_addr: *mut i64,
18025    k: __mmask8,
18026    vindex: __m256i,
18027    a: __m256i,
18028) {
18029    static_assert_imm8_scale!(SCALE);
18030    vpscatterqq_256(base_addr as _, k, vindex.as_i64x4(), a.as_i64x4(), SCALE)
18031}
18032
18033/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18034/// at packed 64-bit integer indices stored in vindex scaled by scale
18035///
18036/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
18037#[inline]
18038#[target_feature(enable = "avx512f,avx512vl")]
18039#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18040#[rustc_legacy_const_generics(3)]
18041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18042pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
18043    base_addr: *mut f64,
18044    vindex: __m256i,
18045    a: __m256d,
18046) {
18047    static_assert_imm8_scale!(SCALE);
18048    vscatterqpd_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f64x4(), SCALE)
18049}
18050
18051/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18052/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18053/// mask bit is not set are not written to memory).
18054///
18055/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
18056#[inline]
18057#[target_feature(enable = "avx512f,avx512vl")]
18058#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18059#[rustc_legacy_const_generics(4)]
18060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18061pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
18062    base_addr: *mut f64,
18063    k: __mmask8,
18064    vindex: __m256i,
18065    a: __m256d,
18066) {
18067    static_assert_imm8_scale!(SCALE);
18068    vscatterqpd_256(base_addr as _, k, vindex.as_i64x4(), a.as_f64x4(), SCALE)
18069}
18070
18071/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18072/// at packed 64-bit integer indices stored in vindex scaled by scale
18073///
18074/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
18075#[inline]
18076#[target_feature(enable = "avx512f,avx512vl")]
18077#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18078#[rustc_legacy_const_generics(3)]
18079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18080pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
18081    base_addr: *mut f32,
18082    vindex: __m256i,
18083    a: __m128,
18084) {
18085    static_assert_imm8_scale!(SCALE);
18086    vscatterqps_256(base_addr as _, 0xff, vindex.as_i64x4(), a.as_f32x4(), SCALE)
18087}
18088
18089/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18090/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18091/// mask bit is not set are not written to memory).
18092///
18093/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
18094#[inline]
18095#[target_feature(enable = "avx512f,avx512vl")]
18096#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18097#[rustc_legacy_const_generics(4)]
18098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18099pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
18100    base_addr: *mut f32,
18101    k: __mmask8,
18102    vindex: __m256i,
18103    a: __m128,
18104) {
18105    static_assert_imm8_scale!(SCALE);
18106    vscatterqps_256(base_addr as _, k, vindex.as_i64x4(), a.as_f32x4(), SCALE)
18107}
18108
18109/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18110/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18111/// mask bit is not set).
18112///
18113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
18114#[inline]
18115#[target_feature(enable = "avx512f,avx512vl")]
18116#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18117#[rustc_legacy_const_generics(4)]
18118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18119pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
18120    src: __m256i,
18121    k: __mmask8,
18122    vindex: __m256i,
18123    base_addr: *const i32,
18124) -> __m256i {
18125    static_assert_imm8_scale!(SCALE);
18126    transmute(vpgatherdd_256(
18127        src.as_i32x8(),
18128        base_addr as _,
18129        vindex.as_i32x8(),
18130        k,
18131        SCALE,
18132    ))
18133}
18134
18135/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18136/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18137/// mask bit is not set).
18138///
18139/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
18140#[inline]
18141#[target_feature(enable = "avx512f,avx512vl")]
18142#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18143#[rustc_legacy_const_generics(4)]
18144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18145pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
18146    src: __m256i,
18147    k: __mmask8,
18148    vindex: __m128i,
18149    base_addr: *const i64,
18150) -> __m256i {
18151    static_assert_imm8_scale!(SCALE);
18152    transmute(vpgatherdq_256(
18153        src.as_i64x4(),
18154        base_addr as _,
18155        vindex.as_i32x4(),
18156        k,
18157        SCALE,
18158    ))
18159}
18160
18161/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18162/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18163/// from src when the corresponding mask bit is not set).
18164///
18165/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
18166#[inline]
18167#[target_feature(enable = "avx512f,avx512vl")]
18168#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18169#[rustc_legacy_const_generics(4)]
18170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18171pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
18172    src: __m256d,
18173    k: __mmask8,
18174    vindex: __m128i,
18175    base_addr: *const f64,
18176) -> __m256d {
18177    static_assert_imm8_scale!(SCALE);
18178    transmute(vgatherdpd_256(
18179        src.as_f64x4(),
18180        base_addr as _,
18181        vindex.as_i32x4(),
18182        k,
18183        SCALE,
18184    ))
18185}
18186
18187/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18188/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18189/// from src when the corresponding mask bit is not set).
18190///
18191/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
18192#[inline]
18193#[target_feature(enable = "avx512f,avx512vl")]
18194#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18195#[rustc_legacy_const_generics(4)]
18196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18197pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
18198    src: __m256,
18199    k: __mmask8,
18200    vindex: __m256i,
18201    base_addr: *const f32,
18202) -> __m256 {
18203    static_assert_imm8_scale!(SCALE);
18204    transmute(vgatherdps_256(
18205        src.as_f32x8(),
18206        base_addr as _,
18207        vindex.as_i32x8(),
18208        k,
18209        SCALE,
18210    ))
18211}
18212
18213/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18214/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18215/// mask bit is not set).
18216///
18217/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
18218#[inline]
18219#[target_feature(enable = "avx512f,avx512vl")]
18220#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18221#[rustc_legacy_const_generics(4)]
18222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18223pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
18224    src: __m128i,
18225    k: __mmask8,
18226    vindex: __m256i,
18227    base_addr: *const i32,
18228) -> __m128i {
18229    static_assert_imm8_scale!(SCALE);
18230    transmute(vpgatherqd_256(
18231        src.as_i32x4(),
18232        base_addr as _,
18233        vindex.as_i64x4(),
18234        k,
18235        SCALE,
18236    ))
18237}
18238
18239/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18240/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18241/// mask bit is not set).
18242///
18243/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
18244#[inline]
18245#[target_feature(enable = "avx512f,avx512vl")]
18246#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18247#[rustc_legacy_const_generics(4)]
18248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18249pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
18250    src: __m256i,
18251    k: __mmask8,
18252    vindex: __m256i,
18253    base_addr: *const i64,
18254) -> __m256i {
18255    static_assert_imm8_scale!(SCALE);
18256    transmute(vpgatherqq_256(
18257        src.as_i64x4(),
18258        base_addr as _,
18259        vindex.as_i64x4(),
18260        k,
18261        SCALE,
18262    ))
18263}
18264
18265/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18266/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18267/// from src when the corresponding mask bit is not set).
18268///
18269/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
18270#[inline]
18271#[target_feature(enable = "avx512f,avx512vl")]
18272#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18273#[rustc_legacy_const_generics(4)]
18274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18275pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
18276    src: __m256d,
18277    k: __mmask8,
18278    vindex: __m256i,
18279    base_addr: *const f64,
18280) -> __m256d {
18281    static_assert_imm8_scale!(SCALE);
18282    transmute(vgatherqpd_256(
18283        src.as_f64x4(),
18284        base_addr as _,
18285        vindex.as_i64x4(),
18286        k,
18287        SCALE,
18288    ))
18289}
18290
18291/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18292/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18293/// from src when the corresponding mask bit is not set).
18294///
18295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
18296#[inline]
18297#[target_feature(enable = "avx512f,avx512vl")]
18298#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18299#[rustc_legacy_const_generics(4)]
18300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18301pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
18302    src: __m128,
18303    k: __mmask8,
18304    vindex: __m256i,
18305    base_addr: *const f32,
18306) -> __m128 {
18307    static_assert_imm8_scale!(SCALE);
18308    transmute(vgatherqps_256(
18309        src.as_f32x4(),
18310        base_addr as _,
18311        vindex.as_i64x4(),
18312        k,
18313        SCALE,
18314    ))
18315}
18316
18317/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18318/// indices stored in vindex scaled by scale
18319///
18320/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
18321#[inline]
18322#[target_feature(enable = "avx512f,avx512vl")]
18323#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
18324#[rustc_legacy_const_generics(3)]
18325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18326pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
18327    base_addr: *mut i32,
18328    vindex: __m128i,
18329    a: __m128i,
18330) {
18331    static_assert_imm8_scale!(SCALE);
18332    vpscatterdd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i32x4(), SCALE)
18333}
18334
18335/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18336/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18337/// are not written to memory).
18338///
18339/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
18340#[inline]
18341#[target_feature(enable = "avx512f,avx512vl")]
18342#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
18343#[rustc_legacy_const_generics(4)]
18344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18345pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
18346    base_addr: *mut i32,
18347    k: __mmask8,
18348    vindex: __m128i,
18349    a: __m128i,
18350) {
18351    static_assert_imm8_scale!(SCALE);
18352    vpscatterdd_128(base_addr as _, k, vindex.as_i32x4(), a.as_i32x4(), SCALE)
18353}
18354
18355/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18356/// indices stored in vindex scaled by scale
18357///
18358/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
18359#[inline]
18360#[target_feature(enable = "avx512f,avx512vl")]
18361#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
18362#[rustc_legacy_const_generics(3)]
18363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18364pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
18365    base_addr: *mut i64,
18366    vindex: __m128i,
18367    a: __m128i,
18368) {
18369    static_assert_imm8_scale!(SCALE);
18370    vpscatterdq_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_i64x2(), SCALE)
18371}
18372
18373/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
18374/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18375/// are not written to memory).
18376///
18377/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
18378#[inline]
18379#[target_feature(enable = "avx512f,avx512vl")]
18380#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
18381#[rustc_legacy_const_generics(4)]
18382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18383pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
18384    base_addr: *mut i64,
18385    k: __mmask8,
18386    vindex: __m128i,
18387    a: __m128i,
18388) {
18389    static_assert_imm8_scale!(SCALE);
18390    vpscatterdq_128(base_addr as _, k, vindex.as_i32x4(), a.as_i64x2(), SCALE)
18391}
18392
18393/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18394/// at packed 32-bit integer indices stored in vindex scaled by scale
18395///
18396/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
18397#[inline]
18398#[target_feature(enable = "avx512f,avx512vl")]
18399#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18400#[rustc_legacy_const_generics(3)]
18401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18402pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
18403    base_addr: *mut f64,
18404    vindex: __m128i,
18405    a: __m128d,
18406) {
18407    static_assert_imm8_scale!(SCALE);
18408    vscatterdpd_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18409}
18410
18411/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18412/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18413/// mask bit is not set are not written to memory).
18414///
18415/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
18416#[inline]
18417#[target_feature(enable = "avx512f,avx512vl")]
18418#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18419#[rustc_legacy_const_generics(4)]
18420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18421pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
18422    base_addr: *mut f64,
18423    k: __mmask8,
18424    vindex: __m128i,
18425    a: __m128d,
18426) {
18427    static_assert_imm8_scale!(SCALE);
18428    vscatterdpd_128(base_addr as _, k, vindex.as_i32x4(), a.as_f64x2(), SCALE)
18429}
18430
18431/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18432/// at packed 32-bit integer indices stored in vindex scaled by scale
18433///
18434/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
18435#[inline]
18436#[target_feature(enable = "avx512f,avx512vl")]
18437#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18438#[rustc_legacy_const_generics(3)]
18439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18440pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18441    static_assert_imm8_scale!(SCALE);
18442    vscatterdps_128(base_addr as _, 0xff, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18443}
18444
18445/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18446/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18447/// mask bit is not set are not written to memory).
18448///
18449/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
18450#[inline]
18451#[target_feature(enable = "avx512f,avx512vl")]
18452#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18453#[rustc_legacy_const_generics(4)]
18454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18455pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
18456    base_addr: *mut f32,
18457    k: __mmask8,
18458    vindex: __m128i,
18459    a: __m128,
18460) {
18461    static_assert_imm8_scale!(SCALE);
18462    vscatterdps_128(base_addr as _, k, vindex.as_i32x4(), a.as_f32x4(), SCALE)
18463}
18464
18465/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18466/// indices stored in vindex scaled by scale
18467///
18468/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18469#[inline]
18470#[target_feature(enable = "avx512f,avx512vl")]
18471#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18472#[rustc_legacy_const_generics(3)]
18473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18474pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18475    base_addr: *mut i32,
18476    vindex: __m128i,
18477    a: __m128i,
18478) {
18479    static_assert_imm8_scale!(SCALE);
18480    vpscatterqd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18481}
18482
18483/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18484/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18485/// are not written to memory).
18486///
18487/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18488#[inline]
18489#[target_feature(enable = "avx512f,avx512vl")]
18490#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18491#[rustc_legacy_const_generics(4)]
18492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18493pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18494    base_addr: *mut i32,
18495    k: __mmask8,
18496    vindex: __m128i,
18497    a: __m128i,
18498) {
18499    static_assert_imm8_scale!(SCALE);
18500    vpscatterqd_128(base_addr as _, k, vindex.as_i64x2(), a.as_i32x4(), SCALE)
18501}
18502
18503/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18504/// indices stored in vindex scaled by scale
18505///
18506/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18507#[inline]
18508#[target_feature(enable = "avx512f,avx512vl")]
18509#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18510#[rustc_legacy_const_generics(3)]
18511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18512pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18513    base_addr: *mut i64,
18514    vindex: __m128i,
18515    a: __m128i,
18516) {
18517    static_assert_imm8_scale!(SCALE);
18518    vpscatterqq_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18519}
18520
18521/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18522/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18523/// are not written to memory).
18524///
18525/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18526#[inline]
18527#[target_feature(enable = "avx512f,avx512vl")]
18528#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18529#[rustc_legacy_const_generics(4)]
18530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18531pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18532    base_addr: *mut i64,
18533    k: __mmask8,
18534    vindex: __m128i,
18535    a: __m128i,
18536) {
18537    static_assert_imm8_scale!(SCALE);
18538    vpscatterqq_128(base_addr as _, k, vindex.as_i64x2(), a.as_i64x2(), SCALE)
18539}
18540
18541/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18542/// at packed 64-bit integer indices stored in vindex scaled by scale
18543///
18544/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18545#[inline]
18546#[target_feature(enable = "avx512f,avx512vl")]
18547#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18548#[rustc_legacy_const_generics(3)]
18549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18550pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18551    base_addr: *mut f64,
18552    vindex: __m128i,
18553    a: __m128d,
18554) {
18555    static_assert_imm8_scale!(SCALE);
18556    vscatterqpd_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18557}
18558
18559/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18560/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18561/// mask bit is not set are not written to memory).
18562///
18563/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18564#[inline]
18565#[target_feature(enable = "avx512f,avx512vl")]
18566#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18567#[rustc_legacy_const_generics(4)]
18568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18569pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18570    base_addr: *mut f64,
18571    k: __mmask8,
18572    vindex: __m128i,
18573    a: __m128d,
18574) {
18575    static_assert_imm8_scale!(SCALE);
18576    vscatterqpd_128(base_addr as _, k, vindex.as_i64x2(), a.as_f64x2(), SCALE)
18577}
18578
18579/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18580/// at packed 64-bit integer indices stored in vindex scaled by scale
18581///
18582/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18583#[inline]
18584#[target_feature(enable = "avx512f,avx512vl")]
18585#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18586#[rustc_legacy_const_generics(3)]
18587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18588pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18589    static_assert_imm8_scale!(SCALE);
18590    vscatterqps_128(base_addr as _, 0xff, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18591}
18592
18593/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18594/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18595///
18596/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18597#[inline]
18598#[target_feature(enable = "avx512f,avx512vl")]
18599#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18600#[rustc_legacy_const_generics(4)]
18601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18602pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18603    base_addr: *mut f32,
18604    k: __mmask8,
18605    vindex: __m128i,
18606    a: __m128,
18607) {
18608    static_assert_imm8_scale!(SCALE);
18609    vscatterqps_128(base_addr as _, k, vindex.as_i64x2(), a.as_f32x4(), SCALE)
18610}
18611
18612/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18613/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18614/// mask bit is not set).
18615///
18616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18617#[inline]
18618#[target_feature(enable = "avx512f,avx512vl")]
18619#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18620#[rustc_legacy_const_generics(4)]
18621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18622pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18623    src: __m128i,
18624    k: __mmask8,
18625    vindex: __m128i,
18626    base_addr: *const i32,
18627) -> __m128i {
18628    static_assert_imm8_scale!(SCALE);
18629    transmute(vpgatherdd_128(
18630        src.as_i32x4(),
18631        base_addr as _,
18632        vindex.as_i32x4(),
18633        k,
18634        SCALE,
18635    ))
18636}
18637
18638/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18639/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18640/// mask bit is not set).
18641///
18642/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18643#[inline]
18644#[target_feature(enable = "avx512f,avx512vl")]
18645#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18646#[rustc_legacy_const_generics(4)]
18647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18648pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18649    src: __m128i,
18650    k: __mmask8,
18651    vindex: __m128i,
18652    base_addr: *const i64,
18653) -> __m128i {
18654    static_assert_imm8_scale!(SCALE);
18655    transmute(vpgatherdq_128(
18656        src.as_i64x2(),
18657        base_addr as _,
18658        vindex.as_i32x4(),
18659        k,
18660        SCALE,
18661    ))
18662}
18663
18664/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18665/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18666/// from src when the corresponding mask bit is not set).
18667///
18668/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18669#[inline]
18670#[target_feature(enable = "avx512f,avx512vl")]
18671#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18672#[rustc_legacy_const_generics(4)]
18673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18674pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18675    src: __m128d,
18676    k: __mmask8,
18677    vindex: __m128i,
18678    base_addr: *const f64,
18679) -> __m128d {
18680    static_assert_imm8_scale!(SCALE);
18681    transmute(vgatherdpd_128(
18682        src.as_f64x2(),
18683        base_addr as _,
18684        vindex.as_i32x4(),
18685        k,
18686        SCALE,
18687    ))
18688}
18689
18690/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18691/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18692/// from src when the corresponding mask bit is not set).
18693///
18694/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18695#[inline]
18696#[target_feature(enable = "avx512f,avx512vl")]
18697#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18698#[rustc_legacy_const_generics(4)]
18699#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18700pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18701    src: __m128,
18702    k: __mmask8,
18703    vindex: __m128i,
18704    base_addr: *const f32,
18705) -> __m128 {
18706    static_assert_imm8_scale!(SCALE);
18707    transmute(vgatherdps_128(
18708        src.as_f32x4(),
18709        base_addr as _,
18710        vindex.as_i32x4(),
18711        k,
18712        SCALE,
18713    ))
18714}
18715
18716/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18717/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18718/// mask bit is not set).
18719///
18720/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18721#[inline]
18722#[target_feature(enable = "avx512f,avx512vl")]
18723#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18724#[rustc_legacy_const_generics(4)]
18725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18726pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18727    src: __m128i,
18728    k: __mmask8,
18729    vindex: __m128i,
18730    base_addr: *const i32,
18731) -> __m128i {
18732    static_assert_imm8_scale!(SCALE);
18733    transmute(vpgatherqd_128(
18734        src.as_i32x4(),
18735        base_addr as _,
18736        vindex.as_i64x2(),
18737        k,
18738        SCALE,
18739    ))
18740}
18741
18742/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18743/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18744/// mask bit is not set).
18745///
18746/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18747#[inline]
18748#[target_feature(enable = "avx512f,avx512vl")]
18749#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18750#[rustc_legacy_const_generics(4)]
18751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18752pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18753    src: __m128i,
18754    k: __mmask8,
18755    vindex: __m128i,
18756    base_addr: *const i64,
18757) -> __m128i {
18758    static_assert_imm8_scale!(SCALE);
18759    transmute(vpgatherqq_128(
18760        src.as_i64x2(),
18761        base_addr as _,
18762        vindex.as_i64x2(),
18763        k,
18764        SCALE,
18765    ))
18766}
18767
18768/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18769/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18770/// from src when the corresponding mask bit is not set).
18771///
18772/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18773#[inline]
18774#[target_feature(enable = "avx512f,avx512vl")]
18775#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18776#[rustc_legacy_const_generics(4)]
18777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18778pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18779    src: __m128d,
18780    k: __mmask8,
18781    vindex: __m128i,
18782    base_addr: *const f64,
18783) -> __m128d {
18784    static_assert_imm8_scale!(SCALE);
18785    transmute(vgatherqpd_128(
18786        src.as_f64x2(),
18787        base_addr as _,
18788        vindex.as_i64x2(),
18789        k,
18790        SCALE,
18791    ))
18792}
18793
18794/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18795/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18796/// from src when the corresponding mask bit is not set).
18797///
18798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18799#[inline]
18800#[target_feature(enable = "avx512f,avx512vl")]
18801#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18802#[rustc_legacy_const_generics(4)]
18803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18804pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18805    src: __m128,
18806    k: __mmask8,
18807    vindex: __m128i,
18808    base_addr: *const f32,
18809) -> __m128 {
18810    static_assert_imm8_scale!(SCALE);
18811    transmute(vgatherqps_128(
18812        src.as_f32x4(),
18813        base_addr as _,
18814        vindex.as_i64x2(),
18815        k,
18816        SCALE,
18817    ))
18818}
18819
18820/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18821///
18822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18823#[inline]
18824#[target_feature(enable = "avx512f")]
18825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18826#[cfg_attr(test, assert_instr(vpcompressd))]
18827pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18828    unsafe { transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k)) }
18829}
18830
18831/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18832///
18833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18834#[inline]
18835#[target_feature(enable = "avx512f")]
18836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18837#[cfg_attr(test, assert_instr(vpcompressd))]
18838pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18839    unsafe { transmute(vpcompressd(a.as_i32x16(), i32x16::ZERO, k)) }
18840}
18841
18842/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18843///
18844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18845#[inline]
18846#[target_feature(enable = "avx512f,avx512vl")]
18847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18848#[cfg_attr(test, assert_instr(vpcompressd))]
18849pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18850    unsafe { transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k)) }
18851}
18852
18853/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18854///
18855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18856#[inline]
18857#[target_feature(enable = "avx512f,avx512vl")]
18858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18859#[cfg_attr(test, assert_instr(vpcompressd))]
18860pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18861    unsafe { transmute(vpcompressd256(a.as_i32x8(), i32x8::ZERO, k)) }
18862}
18863
18864/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18865///
18866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18867#[inline]
18868#[target_feature(enable = "avx512f,avx512vl")]
18869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18870#[cfg_attr(test, assert_instr(vpcompressd))]
18871pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18872    unsafe { transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k)) }
18873}
18874
18875/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18876///
18877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18878#[inline]
18879#[target_feature(enable = "avx512f,avx512vl")]
18880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18881#[cfg_attr(test, assert_instr(vpcompressd))]
18882pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18883    unsafe { transmute(vpcompressd128(a.as_i32x4(), i32x4::ZERO, k)) }
18884}
18885
18886/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18887///
18888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18889#[inline]
18890#[target_feature(enable = "avx512f")]
18891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18892#[cfg_attr(test, assert_instr(vpcompressq))]
18893pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18894    unsafe { transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k)) }
18895}
18896
18897/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18898///
18899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18900#[inline]
18901#[target_feature(enable = "avx512f")]
18902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18903#[cfg_attr(test, assert_instr(vpcompressq))]
18904pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18905    unsafe { transmute(vpcompressq(a.as_i64x8(), i64x8::ZERO, k)) }
18906}
18907
18908/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18909///
18910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18911#[inline]
18912#[target_feature(enable = "avx512f,avx512vl")]
18913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18914#[cfg_attr(test, assert_instr(vpcompressq))]
18915pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18916    unsafe { transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k)) }
18917}
18918
18919/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18920///
18921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18922#[inline]
18923#[target_feature(enable = "avx512f,avx512vl")]
18924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18925#[cfg_attr(test, assert_instr(vpcompressq))]
18926pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18927    unsafe { transmute(vpcompressq256(a.as_i64x4(), i64x4::ZERO, k)) }
18928}
18929
18930/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18931///
18932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18933#[inline]
18934#[target_feature(enable = "avx512f,avx512vl")]
18935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18936#[cfg_attr(test, assert_instr(vpcompressq))]
18937pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18938    unsafe { transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k)) }
18939}
18940
18941/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18942///
18943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18944#[inline]
18945#[target_feature(enable = "avx512f,avx512vl")]
18946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18947#[cfg_attr(test, assert_instr(vpcompressq))]
18948pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18949    unsafe { transmute(vpcompressq128(a.as_i64x2(), i64x2::ZERO, k)) }
18950}
18951
18952/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18953///
18954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18955#[inline]
18956#[target_feature(enable = "avx512f")]
18957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18958#[cfg_attr(test, assert_instr(vcompressps))]
18959pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18960    unsafe { transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k)) }
18961}
18962
18963/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18964///
18965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18966#[inline]
18967#[target_feature(enable = "avx512f")]
18968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18969#[cfg_attr(test, assert_instr(vcompressps))]
18970pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18971    unsafe { transmute(vcompressps(a.as_f32x16(), f32x16::ZERO, k)) }
18972}
18973
18974/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18975///
18976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18977#[inline]
18978#[target_feature(enable = "avx512f,avx512vl")]
18979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18980#[cfg_attr(test, assert_instr(vcompressps))]
18981pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18982    unsafe { transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k)) }
18983}
18984
18985/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18986///
18987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18988#[inline]
18989#[target_feature(enable = "avx512f,avx512vl")]
18990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18991#[cfg_attr(test, assert_instr(vcompressps))]
18992pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18993    unsafe { transmute(vcompressps256(a.as_f32x8(), f32x8::ZERO, k)) }
18994}
18995
18996/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18997///
18998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18999#[inline]
19000#[target_feature(enable = "avx512f,avx512vl")]
19001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19002#[cfg_attr(test, assert_instr(vcompressps))]
19003pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19004    unsafe { transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k)) }
19005}
19006
19007/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19008///
19009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
19010#[inline]
19011#[target_feature(enable = "avx512f,avx512vl")]
19012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19013#[cfg_attr(test, assert_instr(vcompressps))]
19014pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
19015    unsafe { transmute(vcompressps128(a.as_f32x4(), f32x4::ZERO, k)) }
19016}
19017
19018/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19019///
19020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
19021#[inline]
19022#[target_feature(enable = "avx512f")]
19023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19024#[cfg_attr(test, assert_instr(vcompresspd))]
19025pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19026    unsafe { transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k)) }
19027}
19028
19029/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19030///
19031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
19032#[inline]
19033#[target_feature(enable = "avx512f")]
19034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19035#[cfg_attr(test, assert_instr(vcompresspd))]
19036pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
19037    unsafe { transmute(vcompresspd(a.as_f64x8(), f64x8::ZERO, k)) }
19038}
19039
19040/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19041///
19042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
19043#[inline]
19044#[target_feature(enable = "avx512f,avx512vl")]
19045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19046#[cfg_attr(test, assert_instr(vcompresspd))]
19047pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19048    unsafe { transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k)) }
19049}
19050
19051/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19052///
19053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
19054#[inline]
19055#[target_feature(enable = "avx512f,avx512vl")]
19056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19057#[cfg_attr(test, assert_instr(vcompresspd))]
19058pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
19059    unsafe { transmute(vcompresspd256(a.as_f64x4(), f64x4::ZERO, k)) }
19060}
19061
19062/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
19063///
19064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
19065#[inline]
19066#[target_feature(enable = "avx512f,avx512vl")]
19067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19068#[cfg_attr(test, assert_instr(vcompresspd))]
19069pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19070    unsafe { transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k)) }
19071}
19072
19073/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
19074///
19075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
19076#[inline]
19077#[target_feature(enable = "avx512f,avx512vl")]
19078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19079#[cfg_attr(test, assert_instr(vcompresspd))]
19080pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
19081    unsafe { transmute(vcompresspd128(a.as_f64x2(), f64x2::ZERO, k)) }
19082}
19083
19084/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19085///
19086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
19087#[inline]
19088#[target_feature(enable = "avx512f")]
19089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19090#[cfg_attr(test, assert_instr(vpcompressd))]
19091pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
19092    vcompressstored(base_addr as *mut _, a.as_i32x16(), k)
19093}
19094
19095/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19096///
19097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
19098#[inline]
19099#[target_feature(enable = "avx512f,avx512vl")]
19100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19101#[cfg_attr(test, assert_instr(vpcompressd))]
19102pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
19103    vcompressstored256(base_addr as *mut _, a.as_i32x8(), k)
19104}
19105
19106/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19107///
19108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
19109#[inline]
19110#[target_feature(enable = "avx512f,avx512vl")]
19111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19112#[cfg_attr(test, assert_instr(vpcompressd))]
19113pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
19114    vcompressstored128(base_addr as *mut _, a.as_i32x4(), k)
19115}
19116
19117/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19118///
19119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
19120#[inline]
19121#[target_feature(enable = "avx512f")]
19122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19123#[cfg_attr(test, assert_instr(vpcompressq))]
19124pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
19125    vcompressstoreq(base_addr as *mut _, a.as_i64x8(), k)
19126}
19127
19128/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19129///
19130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
19131#[inline]
19132#[target_feature(enable = "avx512f,avx512vl")]
19133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19134#[cfg_attr(test, assert_instr(vpcompressq))]
19135pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
19136    vcompressstoreq256(base_addr as *mut _, a.as_i64x4(), k)
19137}
19138
19139/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19140///
19141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
19142#[inline]
19143#[target_feature(enable = "avx512f,avx512vl")]
19144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19145#[cfg_attr(test, assert_instr(vpcompressq))]
19146pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
19147    vcompressstoreq128(base_addr as *mut _, a.as_i64x2(), k)
19148}
19149
19150/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19151///
19152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
19153#[inline]
19154#[target_feature(enable = "avx512f")]
19155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19156#[cfg_attr(test, assert_instr(vcompressps))]
19157pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
19158    vcompressstoreps(base_addr as *mut _, a.as_f32x16(), k)
19159}
19160
19161/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19162///
19163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
19164#[inline]
19165#[target_feature(enable = "avx512f,avx512vl")]
19166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19167#[cfg_attr(test, assert_instr(vcompressps))]
19168pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
19169    vcompressstoreps256(base_addr as *mut _, a.as_f32x8(), k)
19170}
19171
19172/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19173///
19174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
19175#[inline]
19176#[target_feature(enable = "avx512f,avx512vl")]
19177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19178#[cfg_attr(test, assert_instr(vcompressps))]
19179pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
19180    vcompressstoreps128(base_addr as *mut _, a.as_f32x4(), k)
19181}
19182
19183/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19184///
19185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
19186#[inline]
19187#[target_feature(enable = "avx512f")]
19188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19189#[cfg_attr(test, assert_instr(vcompresspd))]
19190pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
19191    vcompressstorepd(base_addr as *mut _, a.as_f64x8(), k)
19192}
19193
19194/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19195///
19196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
19197#[inline]
19198#[target_feature(enable = "avx512f,avx512vl")]
19199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19200#[cfg_attr(test, assert_instr(vcompresspd))]
19201pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
19202    vcompressstorepd256(base_addr as *mut _, a.as_f64x4(), k)
19203}
19204
19205/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
19206///
19207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
19208#[inline]
19209#[target_feature(enable = "avx512f,avx512vl")]
19210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19211#[cfg_attr(test, assert_instr(vcompresspd))]
19212pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
19213    vcompressstorepd128(base_addr as *mut _, a.as_f64x2(), k)
19214}
19215
19216/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19217///
19218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
19219#[inline]
19220#[target_feature(enable = "avx512f")]
19221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19222#[cfg_attr(test, assert_instr(vpexpandd))]
19223pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19224    unsafe { transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k)) }
19225}
19226
19227/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19228///
19229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
19230#[inline]
19231#[target_feature(enable = "avx512f")]
19232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19233#[cfg_attr(test, assert_instr(vpexpandd))]
19234pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
19235    unsafe { transmute(vpexpandd(a.as_i32x16(), i32x16::ZERO, k)) }
19236}
19237
19238/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19239///
19240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
19241#[inline]
19242#[target_feature(enable = "avx512f,avx512vl")]
19243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19244#[cfg_attr(test, assert_instr(vpexpandd))]
19245pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19246    unsafe { transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k)) }
19247}
19248
19249/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19250///
19251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
19252#[inline]
19253#[target_feature(enable = "avx512f,avx512vl")]
19254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19255#[cfg_attr(test, assert_instr(vpexpandd))]
19256pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
19257    unsafe { transmute(vpexpandd256(a.as_i32x8(), i32x8::ZERO, k)) }
19258}
19259
19260/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19261///
19262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
19263#[inline]
19264#[target_feature(enable = "avx512f,avx512vl")]
19265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19266#[cfg_attr(test, assert_instr(vpexpandd))]
19267pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19268    unsafe { transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k)) }
19269}
19270
19271/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19272///
19273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
19274#[inline]
19275#[target_feature(enable = "avx512f,avx512vl")]
19276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19277#[cfg_attr(test, assert_instr(vpexpandd))]
19278pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
19279    unsafe { transmute(vpexpandd128(a.as_i32x4(), i32x4::ZERO, k)) }
19280}
19281
19282/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19283///
19284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
19285#[inline]
19286#[target_feature(enable = "avx512f")]
19287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19288#[cfg_attr(test, assert_instr(vpexpandq))]
19289pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19290    unsafe { transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k)) }
19291}
19292
19293/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19294///
19295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
19296#[inline]
19297#[target_feature(enable = "avx512f")]
19298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19299#[cfg_attr(test, assert_instr(vpexpandq))]
19300pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
19301    unsafe { transmute(vpexpandq(a.as_i64x8(), i64x8::ZERO, k)) }
19302}
19303
19304/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19305///
19306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
19307#[inline]
19308#[target_feature(enable = "avx512f,avx512vl")]
19309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19310#[cfg_attr(test, assert_instr(vpexpandq))]
19311pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19312    unsafe { transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k)) }
19313}
19314
19315/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19316///
19317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
19318#[inline]
19319#[target_feature(enable = "avx512f,avx512vl")]
19320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19321#[cfg_attr(test, assert_instr(vpexpandq))]
19322pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
19323    unsafe { transmute(vpexpandq256(a.as_i64x4(), i64x4::ZERO, k)) }
19324}
19325
19326/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19327///
19328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
19329#[inline]
19330#[target_feature(enable = "avx512f,avx512vl")]
19331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19332#[cfg_attr(test, assert_instr(vpexpandq))]
19333pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19334    unsafe { transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k)) }
19335}
19336
19337/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19338///
19339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
19340#[inline]
19341#[target_feature(enable = "avx512f,avx512vl")]
19342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19343#[cfg_attr(test, assert_instr(vpexpandq))]
19344pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
19345    unsafe { transmute(vpexpandq128(a.as_i64x2(), i64x2::ZERO, k)) }
19346}
19347
19348/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19349///
19350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
19351#[inline]
19352#[target_feature(enable = "avx512f")]
19353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19354#[cfg_attr(test, assert_instr(vexpandps))]
19355pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
19356    unsafe { transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k)) }
19357}
19358
19359/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19360///
19361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
19362#[inline]
19363#[target_feature(enable = "avx512f")]
19364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19365#[cfg_attr(test, assert_instr(vexpandps))]
19366pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
19367    unsafe { transmute(vexpandps(a.as_f32x16(), f32x16::ZERO, k)) }
19368}
19369
19370/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19371///
19372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
19373#[inline]
19374#[target_feature(enable = "avx512f,avx512vl")]
19375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19376#[cfg_attr(test, assert_instr(vexpandps))]
19377pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
19378    unsafe { transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k)) }
19379}
19380
19381/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19382///
19383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
19384#[inline]
19385#[target_feature(enable = "avx512f,avx512vl")]
19386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19387#[cfg_attr(test, assert_instr(vexpandps))]
19388pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
19389    unsafe { transmute(vexpandps256(a.as_f32x8(), f32x8::ZERO, k)) }
19390}
19391
19392/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19393///
19394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
19395#[inline]
19396#[target_feature(enable = "avx512f,avx512vl")]
19397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19398#[cfg_attr(test, assert_instr(vexpandps))]
19399pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
19400    unsafe { transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k)) }
19401}
19402
19403/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19404///
19405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
19406#[inline]
19407#[target_feature(enable = "avx512f,avx512vl")]
19408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19409#[cfg_attr(test, assert_instr(vexpandps))]
19410pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
19411    unsafe { transmute(vexpandps128(a.as_f32x4(), f32x4::ZERO, k)) }
19412}
19413
19414/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19415///
19416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
19417#[inline]
19418#[target_feature(enable = "avx512f")]
19419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19420#[cfg_attr(test, assert_instr(vexpandpd))]
19421pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19422    unsafe { transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k)) }
19423}
19424
19425/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19426///
19427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
19428#[inline]
19429#[target_feature(enable = "avx512f")]
19430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19431#[cfg_attr(test, assert_instr(vexpandpd))]
19432pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
19433    unsafe { transmute(vexpandpd(a.as_f64x8(), f64x8::ZERO, k)) }
19434}
19435
19436/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19437///
19438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
19439#[inline]
19440#[target_feature(enable = "avx512f,avx512vl")]
19441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19442#[cfg_attr(test, assert_instr(vexpandpd))]
19443pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19444    unsafe { transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k)) }
19445}
19446
19447/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19448///
19449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
19450#[inline]
19451#[target_feature(enable = "avx512f,avx512vl")]
19452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19453#[cfg_attr(test, assert_instr(vexpandpd))]
19454pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
19455    unsafe { transmute(vexpandpd256(a.as_f64x4(), f64x4::ZERO, k)) }
19456}
19457
19458/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19459///
19460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
19461#[inline]
19462#[target_feature(enable = "avx512f,avx512vl")]
19463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19464#[cfg_attr(test, assert_instr(vexpandpd))]
19465pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19466    unsafe { transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k)) }
19467}
19468
19469/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19470///
19471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19472#[inline]
19473#[target_feature(enable = "avx512f,avx512vl")]
19474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19475#[cfg_attr(test, assert_instr(vexpandpd))]
19476pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19477    unsafe { transmute(vexpandpd128(a.as_f64x2(), f64x2::ZERO, k)) }
19478}
19479
19480/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19481///
19482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19483#[inline]
19484#[target_feature(enable = "avx512f")]
19485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19486#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19487#[rustc_legacy_const_generics(1)]
19488#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19489pub const fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19490    static_assert_uimm_bits!(IMM8, 8);
19491    _mm512_rolv_epi32(a, _mm512_set1_epi32(IMM8))
19492}
19493
19494/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19495///
19496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19497#[inline]
19498#[target_feature(enable = "avx512f")]
19499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19500#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19501#[rustc_legacy_const_generics(3)]
19502#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19503pub const fn _mm512_mask_rol_epi32<const IMM8: i32>(
19504    src: __m512i,
19505    k: __mmask16,
19506    a: __m512i,
19507) -> __m512i {
19508    static_assert_uimm_bits!(IMM8, 8);
19509    _mm512_mask_rolv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
19510}
19511
19512/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19513///
19514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19515#[inline]
19516#[target_feature(enable = "avx512f")]
19517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19518#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19519#[rustc_legacy_const_generics(2)]
19520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19521pub const fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19522    static_assert_uimm_bits!(IMM8, 8);
19523    _mm512_maskz_rolv_epi32(k, a, _mm512_set1_epi32(IMM8))
19524}
19525
19526/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19527///
19528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19529#[inline]
19530#[target_feature(enable = "avx512f,avx512vl")]
19531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19532#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19533#[rustc_legacy_const_generics(1)]
19534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19535pub const fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19536    static_assert_uimm_bits!(IMM8, 8);
19537    _mm256_rolv_epi32(a, _mm256_set1_epi32(IMM8))
19538}
19539
19540/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19541///
19542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19543#[inline]
19544#[target_feature(enable = "avx512f,avx512vl")]
19545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19546#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19547#[rustc_legacy_const_generics(3)]
19548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19549pub const fn _mm256_mask_rol_epi32<const IMM8: i32>(
19550    src: __m256i,
19551    k: __mmask8,
19552    a: __m256i,
19553) -> __m256i {
19554    static_assert_uimm_bits!(IMM8, 8);
19555    _mm256_mask_rolv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
19556}
19557
19558/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19559///
19560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19561#[inline]
19562#[target_feature(enable = "avx512f,avx512vl")]
19563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19564#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19565#[rustc_legacy_const_generics(2)]
19566#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19567pub const fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19568    static_assert_uimm_bits!(IMM8, 8);
19569    _mm256_maskz_rolv_epi32(k, a, _mm256_set1_epi32(IMM8))
19570}
19571
19572/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19573///
19574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19575#[inline]
19576#[target_feature(enable = "avx512f,avx512vl")]
19577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19578#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19579#[rustc_legacy_const_generics(1)]
19580#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19581pub const fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19582    static_assert_uimm_bits!(IMM8, 8);
19583    _mm_rolv_epi32(a, _mm_set1_epi32(IMM8))
19584}
19585
19586/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19587///
19588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19589#[inline]
19590#[target_feature(enable = "avx512f,avx512vl")]
19591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19592#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19593#[rustc_legacy_const_generics(3)]
19594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19595pub const fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19596    static_assert_uimm_bits!(IMM8, 8);
19597    _mm_mask_rolv_epi32(src, k, a, _mm_set1_epi32(IMM8))
19598}
19599
19600/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19601///
19602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19603#[inline]
19604#[target_feature(enable = "avx512f,avx512vl")]
19605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19606#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19607#[rustc_legacy_const_generics(2)]
19608#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19609pub const fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19610    static_assert_uimm_bits!(IMM8, 8);
19611    _mm_maskz_rolv_epi32(k, a, _mm_set1_epi32(IMM8))
19612}
19613
19614/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19615///
19616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19617#[inline]
19618#[target_feature(enable = "avx512f")]
19619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19620#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19621#[rustc_legacy_const_generics(1)]
19622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19623pub const fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19624    static_assert_uimm_bits!(IMM8, 8);
19625    _mm512_rorv_epi32(a, _mm512_set1_epi32(IMM8))
19626}
19627
19628/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19629///
19630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19631#[inline]
19632#[target_feature(enable = "avx512f")]
19633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19634#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19635#[rustc_legacy_const_generics(3)]
19636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19637pub const fn _mm512_mask_ror_epi32<const IMM8: i32>(
19638    src: __m512i,
19639    k: __mmask16,
19640    a: __m512i,
19641) -> __m512i {
19642    static_assert_uimm_bits!(IMM8, 8);
19643    _mm512_mask_rorv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
19644}
19645
19646/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19647///
19648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19649#[inline]
19650#[target_feature(enable = "avx512f")]
19651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19652#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19653#[rustc_legacy_const_generics(2)]
19654#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19655pub const fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19656    static_assert_uimm_bits!(IMM8, 8);
19657    _mm512_maskz_rorv_epi32(k, a, _mm512_set1_epi32(IMM8))
19658}
19659
19660/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19661///
19662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19663#[inline]
19664#[target_feature(enable = "avx512f,avx512vl")]
19665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19666#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19667#[rustc_legacy_const_generics(1)]
19668#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19669pub const fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19670    static_assert_uimm_bits!(IMM8, 8);
19671    _mm256_rorv_epi32(a, _mm256_set1_epi32(IMM8))
19672}
19673
19674/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19675///
19676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19677#[inline]
19678#[target_feature(enable = "avx512f,avx512vl")]
19679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19680#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19681#[rustc_legacy_const_generics(3)]
19682#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19683pub const fn _mm256_mask_ror_epi32<const IMM8: i32>(
19684    src: __m256i,
19685    k: __mmask8,
19686    a: __m256i,
19687) -> __m256i {
19688    static_assert_uimm_bits!(IMM8, 8);
19689    _mm256_mask_rorv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
19690}
19691
19692/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19693///
19694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19695#[inline]
19696#[target_feature(enable = "avx512f,avx512vl")]
19697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19698#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19699#[rustc_legacy_const_generics(2)]
19700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19701pub const fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19702    static_assert_uimm_bits!(IMM8, 8);
19703    _mm256_maskz_rorv_epi32(k, a, _mm256_set1_epi32(IMM8))
19704}
19705
19706/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19707///
19708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19709#[inline]
19710#[target_feature(enable = "avx512f,avx512vl")]
19711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19712#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19713#[rustc_legacy_const_generics(1)]
19714#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19715pub const fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19716    static_assert_uimm_bits!(IMM8, 8);
19717    _mm_rorv_epi32(a, _mm_set1_epi32(IMM8))
19718}
19719
19720/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19721///
19722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19723#[inline]
19724#[target_feature(enable = "avx512f,avx512vl")]
19725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19726#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19727#[rustc_legacy_const_generics(3)]
19728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19729pub const fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19730    static_assert_uimm_bits!(IMM8, 8);
19731    _mm_mask_rorv_epi32(src, k, a, _mm_set1_epi32(IMM8))
19732}
19733
19734/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19735///
19736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19737#[inline]
19738#[target_feature(enable = "avx512f,avx512vl")]
19739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19740#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19741#[rustc_legacy_const_generics(2)]
19742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19743pub const fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19744    static_assert_uimm_bits!(IMM8, 8);
19745    _mm_maskz_rorv_epi32(k, a, _mm_set1_epi32(IMM8))
19746}
19747
19748/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19749///
19750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19751#[inline]
19752#[target_feature(enable = "avx512f")]
19753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19754#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19755#[rustc_legacy_const_generics(1)]
19756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19757pub const fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19758    static_assert_uimm_bits!(IMM8, 8);
19759    _mm512_rolv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
19760}
19761
19762/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19763///
19764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19765#[inline]
19766#[target_feature(enable = "avx512f")]
19767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19768#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19769#[rustc_legacy_const_generics(3)]
19770#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19771pub const fn _mm512_mask_rol_epi64<const IMM8: i32>(
19772    src: __m512i,
19773    k: __mmask8,
19774    a: __m512i,
19775) -> __m512i {
19776    static_assert_uimm_bits!(IMM8, 8);
19777    _mm512_mask_rolv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
19778}
19779
19780/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19781///
19782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19783#[inline]
19784#[target_feature(enable = "avx512f")]
19785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19786#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19787#[rustc_legacy_const_generics(2)]
19788#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19789pub const fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19790    static_assert_uimm_bits!(IMM8, 8);
19791    _mm512_maskz_rolv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
19792}
19793
19794/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19795///
19796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19797#[inline]
19798#[target_feature(enable = "avx512f,avx512vl")]
19799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19800#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19801#[rustc_legacy_const_generics(1)]
19802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19803pub const fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19804    static_assert_uimm_bits!(IMM8, 8);
19805    _mm256_rolv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
19806}
19807
19808/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19809///
19810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19811#[inline]
19812#[target_feature(enable = "avx512f,avx512vl")]
19813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19814#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19815#[rustc_legacy_const_generics(3)]
19816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19817pub const fn _mm256_mask_rol_epi64<const IMM8: i32>(
19818    src: __m256i,
19819    k: __mmask8,
19820    a: __m256i,
19821) -> __m256i {
19822    static_assert_uimm_bits!(IMM8, 8);
19823    _mm256_mask_rolv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
19824}
19825
19826/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19827///
19828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19829#[inline]
19830#[target_feature(enable = "avx512f,avx512vl")]
19831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19832#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19833#[rustc_legacy_const_generics(2)]
19834#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19835pub const fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19836    static_assert_uimm_bits!(IMM8, 8);
19837    _mm256_maskz_rolv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
19838}
19839
19840/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19841///
19842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19843#[inline]
19844#[target_feature(enable = "avx512f,avx512vl")]
19845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19846#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19847#[rustc_legacy_const_generics(1)]
19848#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19849pub const fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19850    static_assert_uimm_bits!(IMM8, 8);
19851    _mm_rolv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
19852}
19853
19854/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19855///
19856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19857#[inline]
19858#[target_feature(enable = "avx512f,avx512vl")]
19859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19860#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19861#[rustc_legacy_const_generics(3)]
19862#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19863pub const fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19864    static_assert_uimm_bits!(IMM8, 8);
19865    _mm_mask_rolv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
19866}
19867
19868/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19869///
19870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19871#[inline]
19872#[target_feature(enable = "avx512f,avx512vl")]
19873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19874#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19875#[rustc_legacy_const_generics(2)]
19876#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19877pub const fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19878    static_assert_uimm_bits!(IMM8, 8);
19879    _mm_maskz_rolv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
19880}
19881
19882/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19883///
19884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19885#[inline]
19886#[target_feature(enable = "avx512f")]
19887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19888#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19889#[rustc_legacy_const_generics(1)]
19890#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19891pub const fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19892    static_assert_uimm_bits!(IMM8, 8);
19893    _mm512_rorv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
19894}
19895
19896/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19897///
19898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19899#[inline]
19900#[target_feature(enable = "avx512f")]
19901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19902#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19903#[rustc_legacy_const_generics(3)]
19904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19905pub const fn _mm512_mask_ror_epi64<const IMM8: i32>(
19906    src: __m512i,
19907    k: __mmask8,
19908    a: __m512i,
19909) -> __m512i {
19910    static_assert_uimm_bits!(IMM8, 8);
19911    _mm512_mask_rorv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
19912}
19913
19914/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19915///
19916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19917#[inline]
19918#[target_feature(enable = "avx512f")]
19919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19920#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19921#[rustc_legacy_const_generics(2)]
19922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19923pub const fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19924    static_assert_uimm_bits!(IMM8, 8);
19925    _mm512_maskz_rorv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
19926}
19927
19928/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19929///
19930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19931#[inline]
19932#[target_feature(enable = "avx512f,avx512vl")]
19933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19934#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19935#[rustc_legacy_const_generics(1)]
19936#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19937pub const fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19938    static_assert_uimm_bits!(IMM8, 8);
19939    _mm256_rorv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
19940}
19941
19942/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19943///
19944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19945#[inline]
19946#[target_feature(enable = "avx512f,avx512vl")]
19947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19948#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19949#[rustc_legacy_const_generics(3)]
19950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19951pub const fn _mm256_mask_ror_epi64<const IMM8: i32>(
19952    src: __m256i,
19953    k: __mmask8,
19954    a: __m256i,
19955) -> __m256i {
19956    static_assert_uimm_bits!(IMM8, 8);
19957    _mm256_mask_rorv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
19958}
19959
19960/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19961///
19962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19963#[inline]
19964#[target_feature(enable = "avx512f,avx512vl")]
19965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19966#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19967#[rustc_legacy_const_generics(2)]
19968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19969pub const fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19970    static_assert_uimm_bits!(IMM8, 8);
19971    _mm256_maskz_rorv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
19972}
19973
19974/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19975///
19976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19977#[inline]
19978#[target_feature(enable = "avx512f,avx512vl")]
19979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19980#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19981#[rustc_legacy_const_generics(1)]
19982#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19983pub const fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19984    static_assert_uimm_bits!(IMM8, 8);
19985    _mm_rorv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
19986}
19987
19988/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19989///
19990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19991#[inline]
19992#[target_feature(enable = "avx512f,avx512vl")]
19993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19994#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19995#[rustc_legacy_const_generics(3)]
19996#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19997pub const fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19998    static_assert_uimm_bits!(IMM8, 8);
19999    _mm_mask_rorv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
20000}
20001
20002/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20003///
20004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
20005#[inline]
20006#[target_feature(enable = "avx512f,avx512vl")]
20007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20008#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
20009#[rustc_legacy_const_generics(2)]
20010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20011pub const fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
20012    static_assert_uimm_bits!(IMM8, 8);
20013    _mm_maskz_rorv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
20014}
20015
20016/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20017///
20018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
20019#[inline]
20020#[target_feature(enable = "avx512f")]
20021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20022#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20023#[rustc_legacy_const_generics(1)]
20024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20025pub const fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20026    unsafe {
20027        static_assert_uimm_bits!(IMM8, 8);
20028        if IMM8 >= 32 {
20029            _mm512_setzero_si512()
20030        } else {
20031            transmute(simd_shl(a.as_u32x16(), u32x16::splat(IMM8)))
20032        }
20033    }
20034}
20035
20036/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20037///
20038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
20039#[inline]
20040#[target_feature(enable = "avx512f")]
20041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20042#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20043#[rustc_legacy_const_generics(3)]
20044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20045pub const fn _mm512_mask_slli_epi32<const IMM8: u32>(
20046    src: __m512i,
20047    k: __mmask16,
20048    a: __m512i,
20049) -> __m512i {
20050    unsafe {
20051        static_assert_uimm_bits!(IMM8, 8);
20052        let shf = if IMM8 >= 32 {
20053            u32x16::ZERO
20054        } else {
20055            simd_shl(a.as_u32x16(), u32x16::splat(IMM8))
20056        };
20057        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
20058    }
20059}
20060
20061/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20062///
20063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
20064#[inline]
20065#[target_feature(enable = "avx512f")]
20066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20067#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20068#[rustc_legacy_const_generics(2)]
20069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20070pub const fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20071    unsafe {
20072        static_assert_uimm_bits!(IMM8, 8);
20073        if IMM8 >= 32 {
20074            _mm512_setzero_si512()
20075        } else {
20076            let shf = simd_shl(a.as_u32x16(), u32x16::splat(IMM8));
20077            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
20078        }
20079    }
20080}
20081
20082/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20083///
20084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
20085#[inline]
20086#[target_feature(enable = "avx512f,avx512vl")]
20087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20088#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20089#[rustc_legacy_const_generics(3)]
20090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20091pub const fn _mm256_mask_slli_epi32<const IMM8: u32>(
20092    src: __m256i,
20093    k: __mmask8,
20094    a: __m256i,
20095) -> __m256i {
20096    unsafe {
20097        static_assert_uimm_bits!(IMM8, 8);
20098        let r = if IMM8 >= 32 {
20099            u32x8::ZERO
20100        } else {
20101            simd_shl(a.as_u32x8(), u32x8::splat(IMM8))
20102        };
20103        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
20104    }
20105}
20106
20107/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20108///
20109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
20110#[inline]
20111#[target_feature(enable = "avx512f,avx512vl")]
20112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20113#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20114#[rustc_legacy_const_generics(2)]
20115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20116pub const fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20117    unsafe {
20118        static_assert_uimm_bits!(IMM8, 8);
20119        if IMM8 >= 32 {
20120            _mm256_setzero_si256()
20121        } else {
20122            let r = simd_shl(a.as_u32x8(), u32x8::splat(IMM8));
20123            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
20124        }
20125    }
20126}
20127
20128/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20129///
20130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
20131#[inline]
20132#[target_feature(enable = "avx512f,avx512vl")]
20133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20134#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20135#[rustc_legacy_const_generics(3)]
20136#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20137pub const fn _mm_mask_slli_epi32<const IMM8: u32>(
20138    src: __m128i,
20139    k: __mmask8,
20140    a: __m128i,
20141) -> __m128i {
20142    unsafe {
20143        static_assert_uimm_bits!(IMM8, 8);
20144        let r = if IMM8 >= 32 {
20145            u32x4::ZERO
20146        } else {
20147            simd_shl(a.as_u32x4(), u32x4::splat(IMM8))
20148        };
20149        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
20150    }
20151}
20152
20153/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20154///
20155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
20156#[inline]
20157#[target_feature(enable = "avx512f,avx512vl")]
20158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20159#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
20160#[rustc_legacy_const_generics(2)]
20161#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20162pub const fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20163    unsafe {
20164        static_assert_uimm_bits!(IMM8, 8);
20165        if IMM8 >= 32 {
20166            _mm_setzero_si128()
20167        } else {
20168            let r = simd_shl(a.as_u32x4(), u32x4::splat(IMM8));
20169            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
20170        }
20171    }
20172}
20173
20174/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20175///
20176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
20177#[inline]
20178#[target_feature(enable = "avx512f")]
20179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20180#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20181#[rustc_legacy_const_generics(1)]
20182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20183pub const fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20184    unsafe {
20185        static_assert_uimm_bits!(IMM8, 8);
20186        if IMM8 >= 32 {
20187            _mm512_setzero_si512()
20188        } else {
20189            transmute(simd_shr(a.as_u32x16(), u32x16::splat(IMM8)))
20190        }
20191    }
20192}
20193
20194/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20195///
20196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
20197#[inline]
20198#[target_feature(enable = "avx512f")]
20199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20200#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20201#[rustc_legacy_const_generics(3)]
20202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20203pub const fn _mm512_mask_srli_epi32<const IMM8: u32>(
20204    src: __m512i,
20205    k: __mmask16,
20206    a: __m512i,
20207) -> __m512i {
20208    unsafe {
20209        static_assert_uimm_bits!(IMM8, 8);
20210        let shf = if IMM8 >= 32 {
20211            u32x16::ZERO
20212        } else {
20213            simd_shr(a.as_u32x16(), u32x16::splat(IMM8))
20214        };
20215        transmute(simd_select_bitmask(k, shf, src.as_u32x16()))
20216    }
20217}
20218
20219/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20220///
20221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
20222#[inline]
20223#[target_feature(enable = "avx512f")]
20224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20225#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20226#[rustc_legacy_const_generics(2)]
20227#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20228pub const fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20229    unsafe {
20230        static_assert_uimm_bits!(IMM8, 8);
20231        if IMM8 >= 32 {
20232            _mm512_setzero_si512()
20233        } else {
20234            let shf = simd_shr(a.as_u32x16(), u32x16::splat(IMM8));
20235            transmute(simd_select_bitmask(k, shf, u32x16::ZERO))
20236        }
20237    }
20238}
20239
20240/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20241///
20242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
20243#[inline]
20244#[target_feature(enable = "avx512f,avx512vl")]
20245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20246#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20247#[rustc_legacy_const_generics(3)]
20248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20249pub const fn _mm256_mask_srli_epi32<const IMM8: u32>(
20250    src: __m256i,
20251    k: __mmask8,
20252    a: __m256i,
20253) -> __m256i {
20254    unsafe {
20255        static_assert_uimm_bits!(IMM8, 8);
20256        let r = if IMM8 >= 32 {
20257            u32x8::ZERO
20258        } else {
20259            simd_shr(a.as_u32x8(), u32x8::splat(IMM8))
20260        };
20261        transmute(simd_select_bitmask(k, r, src.as_u32x8()))
20262    }
20263}
20264
20265/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20266///
20267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
20268#[inline]
20269#[target_feature(enable = "avx512f,avx512vl")]
20270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20271#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20272#[rustc_legacy_const_generics(2)]
20273#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20274pub const fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20275    unsafe {
20276        static_assert_uimm_bits!(IMM8, 8);
20277        if IMM8 >= 32 {
20278            _mm256_setzero_si256()
20279        } else {
20280            let r = simd_shr(a.as_u32x8(), u32x8::splat(IMM8));
20281            transmute(simd_select_bitmask(k, r, u32x8::ZERO))
20282        }
20283    }
20284}
20285
20286/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20287///
20288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
20289#[inline]
20290#[target_feature(enable = "avx512f,avx512vl")]
20291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20292#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20293#[rustc_legacy_const_generics(3)]
20294#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20295pub const fn _mm_mask_srli_epi32<const IMM8: u32>(
20296    src: __m128i,
20297    k: __mmask8,
20298    a: __m128i,
20299) -> __m128i {
20300    unsafe {
20301        static_assert_uimm_bits!(IMM8, 8);
20302        let r = if IMM8 >= 32 {
20303            u32x4::ZERO
20304        } else {
20305            simd_shr(a.as_u32x4(), u32x4::splat(IMM8))
20306        };
20307        transmute(simd_select_bitmask(k, r, src.as_u32x4()))
20308    }
20309}
20310
20311/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20312///
20313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
20314#[inline]
20315#[target_feature(enable = "avx512f,avx512vl")]
20316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20317#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
20318#[rustc_legacy_const_generics(2)]
20319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20320pub const fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20321    unsafe {
20322        static_assert_uimm_bits!(IMM8, 8);
20323        if IMM8 >= 32 {
20324            _mm_setzero_si128()
20325        } else {
20326            let r = simd_shr(a.as_u32x4(), u32x4::splat(IMM8));
20327            transmute(simd_select_bitmask(k, r, u32x4::ZERO))
20328        }
20329    }
20330}
20331
20332/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
20333///
20334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
20335#[inline]
20336#[target_feature(enable = "avx512f")]
20337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20338#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20339#[rustc_legacy_const_generics(1)]
20340#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20341pub const fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20342    unsafe {
20343        static_assert_uimm_bits!(IMM8, 8);
20344        if IMM8 >= 64 {
20345            _mm512_setzero_si512()
20346        } else {
20347            transmute(simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20348        }
20349    }
20350}
20351
20352/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20353///
20354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
20355#[inline]
20356#[target_feature(enable = "avx512f")]
20357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20358#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20359#[rustc_legacy_const_generics(3)]
20360#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20361pub const fn _mm512_mask_slli_epi64<const IMM8: u32>(
20362    src: __m512i,
20363    k: __mmask8,
20364    a: __m512i,
20365) -> __m512i {
20366    unsafe {
20367        static_assert_uimm_bits!(IMM8, 8);
20368        let shf = if IMM8 >= 64 {
20369            u64x8::ZERO
20370        } else {
20371            simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20372        };
20373        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20374    }
20375}
20376
20377/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20378///
20379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
20380#[inline]
20381#[target_feature(enable = "avx512f")]
20382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20383#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20384#[rustc_legacy_const_generics(2)]
20385#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20386pub const fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20387    unsafe {
20388        static_assert_uimm_bits!(IMM8, 8);
20389        if IMM8 >= 64 {
20390            _mm512_setzero_si512()
20391        } else {
20392            let shf = simd_shl(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20393            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20394        }
20395    }
20396}
20397
20398/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20399///
20400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
20401#[inline]
20402#[target_feature(enable = "avx512f,avx512vl")]
20403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20404#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20405#[rustc_legacy_const_generics(3)]
20406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20407pub const fn _mm256_mask_slli_epi64<const IMM8: u32>(
20408    src: __m256i,
20409    k: __mmask8,
20410    a: __m256i,
20411) -> __m256i {
20412    unsafe {
20413        static_assert_uimm_bits!(IMM8, 8);
20414        let r = if IMM8 >= 64 {
20415            u64x4::ZERO
20416        } else {
20417            simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20418        };
20419        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20420    }
20421}
20422
20423/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20424///
20425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
20426#[inline]
20427#[target_feature(enable = "avx512f,avx512vl")]
20428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20429#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20430#[rustc_legacy_const_generics(2)]
20431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20432pub const fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20433    unsafe {
20434        static_assert_uimm_bits!(IMM8, 8);
20435        if IMM8 >= 64 {
20436            _mm256_setzero_si256()
20437        } else {
20438            let r = simd_shl(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20439            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20440        }
20441    }
20442}
20443
20444/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20445///
20446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
20447#[inline]
20448#[target_feature(enable = "avx512f,avx512vl")]
20449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20450#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20451#[rustc_legacy_const_generics(3)]
20452#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20453pub const fn _mm_mask_slli_epi64<const IMM8: u32>(
20454    src: __m128i,
20455    k: __mmask8,
20456    a: __m128i,
20457) -> __m128i {
20458    unsafe {
20459        static_assert_uimm_bits!(IMM8, 8);
20460        let r = if IMM8 >= 64 {
20461            u64x2::ZERO
20462        } else {
20463            simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20464        };
20465        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20466    }
20467}
20468
20469/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20470///
20471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20472#[inline]
20473#[target_feature(enable = "avx512f,avx512vl")]
20474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20475#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20476#[rustc_legacy_const_generics(2)]
20477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20478pub const fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20479    unsafe {
20480        static_assert_uimm_bits!(IMM8, 8);
20481        if IMM8 >= 64 {
20482            _mm_setzero_si128()
20483        } else {
20484            let r = simd_shl(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20485            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20486        }
20487    }
20488}
20489
20490/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20491///
20492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20493#[inline]
20494#[target_feature(enable = "avx512f")]
20495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20496#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20497#[rustc_legacy_const_generics(1)]
20498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20499pub const fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20500    unsafe {
20501        static_assert_uimm_bits!(IMM8, 8);
20502        if IMM8 >= 64 {
20503            _mm512_setzero_si512()
20504        } else {
20505            transmute(simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64)))
20506        }
20507    }
20508}
20509
20510/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20511///
20512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20513#[inline]
20514#[target_feature(enable = "avx512f")]
20515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20516#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20517#[rustc_legacy_const_generics(3)]
20518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20519pub const fn _mm512_mask_srli_epi64<const IMM8: u32>(
20520    src: __m512i,
20521    k: __mmask8,
20522    a: __m512i,
20523) -> __m512i {
20524    unsafe {
20525        static_assert_uimm_bits!(IMM8, 8);
20526        let shf = if IMM8 >= 64 {
20527            u64x8::ZERO
20528        } else {
20529            simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64))
20530        };
20531        transmute(simd_select_bitmask(k, shf, src.as_u64x8()))
20532    }
20533}
20534
20535/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20536///
20537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20538#[inline]
20539#[target_feature(enable = "avx512f")]
20540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20541#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20542#[rustc_legacy_const_generics(2)]
20543#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20544pub const fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20545    unsafe {
20546        static_assert_uimm_bits!(IMM8, 8);
20547        if IMM8 >= 64 {
20548            _mm512_setzero_si512()
20549        } else {
20550            let shf = simd_shr(a.as_u64x8(), u64x8::splat(IMM8 as u64));
20551            transmute(simd_select_bitmask(k, shf, u64x8::ZERO))
20552        }
20553    }
20554}
20555
20556/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20557///
20558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20559#[inline]
20560#[target_feature(enable = "avx512f,avx512vl")]
20561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20562#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20563#[rustc_legacy_const_generics(3)]
20564#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20565pub const fn _mm256_mask_srli_epi64<const IMM8: u32>(
20566    src: __m256i,
20567    k: __mmask8,
20568    a: __m256i,
20569) -> __m256i {
20570    unsafe {
20571        static_assert_uimm_bits!(IMM8, 8);
20572        let r = if IMM8 >= 64 {
20573            u64x4::ZERO
20574        } else {
20575            simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64))
20576        };
20577        transmute(simd_select_bitmask(k, r, src.as_u64x4()))
20578    }
20579}
20580
20581/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20582///
20583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20584#[inline]
20585#[target_feature(enable = "avx512f,avx512vl")]
20586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20587#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20588#[rustc_legacy_const_generics(2)]
20589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20590pub const fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20591    unsafe {
20592        static_assert_uimm_bits!(IMM8, 8);
20593        if IMM8 >= 64 {
20594            _mm256_setzero_si256()
20595        } else {
20596            let r = simd_shr(a.as_u64x4(), u64x4::splat(IMM8 as u64));
20597            transmute(simd_select_bitmask(k, r, u64x4::ZERO))
20598        }
20599    }
20600}
20601
20602/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20603///
20604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20605#[inline]
20606#[target_feature(enable = "avx512f,avx512vl")]
20607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20608#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20609#[rustc_legacy_const_generics(3)]
20610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20611pub const fn _mm_mask_srli_epi64<const IMM8: u32>(
20612    src: __m128i,
20613    k: __mmask8,
20614    a: __m128i,
20615) -> __m128i {
20616    unsafe {
20617        static_assert_uimm_bits!(IMM8, 8);
20618        let r = if IMM8 >= 64 {
20619            u64x2::ZERO
20620        } else {
20621            simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64))
20622        };
20623        transmute(simd_select_bitmask(k, r, src.as_u64x2()))
20624    }
20625}
20626
20627/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20628///
20629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20630#[inline]
20631#[target_feature(enable = "avx512f,avx512vl")]
20632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20633#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20634#[rustc_legacy_const_generics(2)]
20635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
20636pub const fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20637    unsafe {
20638        static_assert_uimm_bits!(IMM8, 8);
20639        if IMM8 >= 64 {
20640            _mm_setzero_si128()
20641        } else {
20642            let r = simd_shr(a.as_u64x2(), u64x2::splat(IMM8 as u64));
20643            transmute(simd_select_bitmask(k, r, u64x2::ZERO))
20644        }
20645    }
20646}
20647
20648/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20649///
20650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20651#[inline]
20652#[target_feature(enable = "avx512f")]
20653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20654#[cfg_attr(test, assert_instr(vpslld))]
20655pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20656    unsafe { transmute(vpslld(a.as_i32x16(), count.as_i32x4())) }
20657}
20658
20659/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20660///
20661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20662#[inline]
20663#[target_feature(enable = "avx512f")]
20664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20665#[cfg_attr(test, assert_instr(vpslld))]
20666pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20667    unsafe {
20668        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20669        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20670    }
20671}
20672
20673/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20674///
20675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20676#[inline]
20677#[target_feature(enable = "avx512f")]
20678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20679#[cfg_attr(test, assert_instr(vpslld))]
20680pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20681    unsafe {
20682        let shf = _mm512_sll_epi32(a, count).as_i32x16();
20683        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20684    }
20685}
20686
20687/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20688///
20689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20690#[inline]
20691#[target_feature(enable = "avx512f,avx512vl")]
20692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20693#[cfg_attr(test, assert_instr(vpslld))]
20694pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20695    unsafe {
20696        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20697        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20698    }
20699}
20700
20701/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20702///
20703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20704#[inline]
20705#[target_feature(enable = "avx512f,avx512vl")]
20706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20707#[cfg_attr(test, assert_instr(vpslld))]
20708pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20709    unsafe {
20710        let shf = _mm256_sll_epi32(a, count).as_i32x8();
20711        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20712    }
20713}
20714
20715/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20716///
20717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20718#[inline]
20719#[target_feature(enable = "avx512f,avx512vl")]
20720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20721#[cfg_attr(test, assert_instr(vpslld))]
20722pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20723    unsafe {
20724        let shf = _mm_sll_epi32(a, count).as_i32x4();
20725        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20726    }
20727}
20728
20729/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20730///
20731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20732#[inline]
20733#[target_feature(enable = "avx512f,avx512vl")]
20734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20735#[cfg_attr(test, assert_instr(vpslld))]
20736pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20737    unsafe {
20738        let shf = _mm_sll_epi32(a, count).as_i32x4();
20739        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20740    }
20741}
20742
20743/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20744///
20745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20746#[inline]
20747#[target_feature(enable = "avx512f")]
20748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20749#[cfg_attr(test, assert_instr(vpsrld))]
20750pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20751    unsafe { transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) }
20752}
20753
20754/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20755///
20756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20757#[inline]
20758#[target_feature(enable = "avx512f")]
20759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20760#[cfg_attr(test, assert_instr(vpsrld))]
20761pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20762    unsafe {
20763        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20764        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
20765    }
20766}
20767
20768/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20769///
20770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20771#[inline]
20772#[target_feature(enable = "avx512f")]
20773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20774#[cfg_attr(test, assert_instr(vpsrld))]
20775pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20776    unsafe {
20777        let shf = _mm512_srl_epi32(a, count).as_i32x16();
20778        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
20779    }
20780}
20781
20782/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20783///
20784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20785#[inline]
20786#[target_feature(enable = "avx512f,avx512vl")]
20787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20788#[cfg_attr(test, assert_instr(vpsrld))]
20789pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20790    unsafe {
20791        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20792        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
20793    }
20794}
20795
20796/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20797///
20798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20799#[inline]
20800#[target_feature(enable = "avx512f,avx512vl")]
20801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20802#[cfg_attr(test, assert_instr(vpsrld))]
20803pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20804    unsafe {
20805        let shf = _mm256_srl_epi32(a, count).as_i32x8();
20806        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
20807    }
20808}
20809
20810/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20811///
20812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20813#[inline]
20814#[target_feature(enable = "avx512f,avx512vl")]
20815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20816#[cfg_attr(test, assert_instr(vpsrld))]
20817pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20818    unsafe {
20819        let shf = _mm_srl_epi32(a, count).as_i32x4();
20820        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
20821    }
20822}
20823
20824/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20825///
20826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20827#[inline]
20828#[target_feature(enable = "avx512f,avx512vl")]
20829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20830#[cfg_attr(test, assert_instr(vpsrld))]
20831pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20832    unsafe {
20833        let shf = _mm_srl_epi32(a, count).as_i32x4();
20834        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
20835    }
20836}
20837
20838/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20839///
20840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20841#[inline]
20842#[target_feature(enable = "avx512f")]
20843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20844#[cfg_attr(test, assert_instr(vpsllq))]
20845pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20846    unsafe { transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) }
20847}
20848
20849/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20850///
20851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20852#[inline]
20853#[target_feature(enable = "avx512f")]
20854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20855#[cfg_attr(test, assert_instr(vpsllq))]
20856pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20857    unsafe {
20858        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20859        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20860    }
20861}
20862
20863/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20864///
20865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20866#[inline]
20867#[target_feature(enable = "avx512f")]
20868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20869#[cfg_attr(test, assert_instr(vpsllq))]
20870pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20871    unsafe {
20872        let shf = _mm512_sll_epi64(a, count).as_i64x8();
20873        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20874    }
20875}
20876
20877/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20878///
20879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20880#[inline]
20881#[target_feature(enable = "avx512f,avx512vl")]
20882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20883#[cfg_attr(test, assert_instr(vpsllq))]
20884pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20885    unsafe {
20886        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20887        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20888    }
20889}
20890
20891/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20892///
20893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20894#[inline]
20895#[target_feature(enable = "avx512f,avx512vl")]
20896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20897#[cfg_attr(test, assert_instr(vpsllq))]
20898pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20899    unsafe {
20900        let shf = _mm256_sll_epi64(a, count).as_i64x4();
20901        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20902    }
20903}
20904
20905/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20906///
20907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20908#[inline]
20909#[target_feature(enable = "avx512f,avx512vl")]
20910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20911#[cfg_attr(test, assert_instr(vpsllq))]
20912pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20913    unsafe {
20914        let shf = _mm_sll_epi64(a, count).as_i64x2();
20915        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
20916    }
20917}
20918
20919/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20920///
20921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20922#[inline]
20923#[target_feature(enable = "avx512f,avx512vl")]
20924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20925#[cfg_attr(test, assert_instr(vpsllq))]
20926pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20927    unsafe {
20928        let shf = _mm_sll_epi64(a, count).as_i64x2();
20929        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
20930    }
20931}
20932
20933/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20934///
20935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20936#[inline]
20937#[target_feature(enable = "avx512f")]
20938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20939#[cfg_attr(test, assert_instr(vpsrlq))]
20940pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20941    unsafe { transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20942}
20943
20944/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20945///
20946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20947#[inline]
20948#[target_feature(enable = "avx512f")]
20949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20950#[cfg_attr(test, assert_instr(vpsrlq))]
20951pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20952    unsafe {
20953        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20954        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
20955    }
20956}
20957
20958/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20959///
20960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20961#[inline]
20962#[target_feature(enable = "avx512f")]
20963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20964#[cfg_attr(test, assert_instr(vpsrlq))]
20965pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20966    unsafe {
20967        let shf = _mm512_srl_epi64(a, count).as_i64x8();
20968        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
20969    }
20970}
20971
20972/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20973///
20974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20975#[inline]
20976#[target_feature(enable = "avx512f,avx512vl")]
20977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20978#[cfg_attr(test, assert_instr(vpsrlq))]
20979pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20980    unsafe {
20981        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20982        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
20983    }
20984}
20985
20986/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20987///
20988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20989#[inline]
20990#[target_feature(enable = "avx512f,avx512vl")]
20991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20992#[cfg_attr(test, assert_instr(vpsrlq))]
20993pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20994    unsafe {
20995        let shf = _mm256_srl_epi64(a, count).as_i64x4();
20996        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
20997    }
20998}
20999
21000/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21001///
21002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
21003#[inline]
21004#[target_feature(enable = "avx512f,avx512vl")]
21005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21006#[cfg_attr(test, assert_instr(vpsrlq))]
21007pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21008    unsafe {
21009        let shf = _mm_srl_epi64(a, count).as_i64x2();
21010        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21011    }
21012}
21013
21014/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21015///
21016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
21017#[inline]
21018#[target_feature(enable = "avx512f,avx512vl")]
21019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21020#[cfg_attr(test, assert_instr(vpsrlq))]
21021pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21022    unsafe {
21023        let shf = _mm_srl_epi64(a, count).as_i64x2();
21024        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21025    }
21026}
21027
21028/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21029///
21030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
21031#[inline]
21032#[target_feature(enable = "avx512f")]
21033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21034#[cfg_attr(test, assert_instr(vpsrad))]
21035pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
21036    unsafe { transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) }
21037}
21038
21039/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21040///
21041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
21042#[inline]
21043#[target_feature(enable = "avx512f")]
21044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21045#[cfg_attr(test, assert_instr(vpsrad))]
21046pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21047    unsafe {
21048        let shf = _mm512_sra_epi32(a, count).as_i32x16();
21049        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21050    }
21051}
21052
21053/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21054///
21055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
21056#[inline]
21057#[target_feature(enable = "avx512f")]
21058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21059#[cfg_attr(test, assert_instr(vpsrad))]
21060pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
21061    unsafe {
21062        let shf = _mm512_sra_epi32(a, count).as_i32x16();
21063        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21064    }
21065}
21066
21067/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21068///
21069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
21070#[inline]
21071#[target_feature(enable = "avx512f,avx512vl")]
21072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21073#[cfg_attr(test, assert_instr(vpsrad))]
21074pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21075    unsafe {
21076        let shf = _mm256_sra_epi32(a, count).as_i32x8();
21077        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21078    }
21079}
21080
21081/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21082///
21083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
21084#[inline]
21085#[target_feature(enable = "avx512f,avx512vl")]
21086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21087#[cfg_attr(test, assert_instr(vpsrad))]
21088pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21089    unsafe {
21090        let shf = _mm256_sra_epi32(a, count).as_i32x8();
21091        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21092    }
21093}
21094
21095/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21096///
21097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
21098#[inline]
21099#[target_feature(enable = "avx512f,avx512vl")]
21100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21101#[cfg_attr(test, assert_instr(vpsrad))]
21102pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21103    unsafe {
21104        let shf = _mm_sra_epi32(a, count).as_i32x4();
21105        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21106    }
21107}
21108
21109/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21110///
21111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
21112#[inline]
21113#[target_feature(enable = "avx512f,avx512vl")]
21114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21115#[cfg_attr(test, assert_instr(vpsrad))]
21116pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21117    unsafe {
21118        let shf = _mm_sra_epi32(a, count).as_i32x4();
21119        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21120    }
21121}
21122
21123/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21124///
21125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
21126#[inline]
21127#[target_feature(enable = "avx512f")]
21128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21129#[cfg_attr(test, assert_instr(vpsraq))]
21130pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
21131    unsafe { transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) }
21132}
21133
21134/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21135///
21136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
21137#[inline]
21138#[target_feature(enable = "avx512f")]
21139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21140#[cfg_attr(test, assert_instr(vpsraq))]
21141pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21142    unsafe {
21143        let shf = _mm512_sra_epi64(a, count).as_i64x8();
21144        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21145    }
21146}
21147
21148/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21149///
21150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
21151#[inline]
21152#[target_feature(enable = "avx512f")]
21153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21154#[cfg_attr(test, assert_instr(vpsraq))]
21155pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
21156    unsafe {
21157        let shf = _mm512_sra_epi64(a, count).as_i64x8();
21158        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21159    }
21160}
21161
21162/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21163///
21164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
21165#[inline]
21166#[target_feature(enable = "avx512f,avx512vl")]
21167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21168#[cfg_attr(test, assert_instr(vpsraq))]
21169pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
21170    unsafe { transmute(vpsraq256(a.as_i64x4(), count.as_i64x2())) }
21171}
21172
21173/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21174///
21175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
21176#[inline]
21177#[target_feature(enable = "avx512f,avx512vl")]
21178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21179#[cfg_attr(test, assert_instr(vpsraq))]
21180pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21181    unsafe {
21182        let shf = _mm256_sra_epi64(a, count).as_i64x4();
21183        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21184    }
21185}
21186
21187/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21188///
21189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
21190#[inline]
21191#[target_feature(enable = "avx512f,avx512vl")]
21192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21193#[cfg_attr(test, assert_instr(vpsraq))]
21194pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
21195    unsafe {
21196        let shf = _mm256_sra_epi64(a, count).as_i64x4();
21197        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21198    }
21199}
21200
21201/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
21202///
21203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
21204#[inline]
21205#[target_feature(enable = "avx512f,avx512vl")]
21206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21207#[cfg_attr(test, assert_instr(vpsraq))]
21208pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
21209    unsafe { transmute(vpsraq128(a.as_i64x2(), count.as_i64x2())) }
21210}
21211
21212/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21213///
21214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
21215#[inline]
21216#[target_feature(enable = "avx512f,avx512vl")]
21217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21218#[cfg_attr(test, assert_instr(vpsraq))]
21219pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21220    unsafe {
21221        let shf = _mm_sra_epi64(a, count).as_i64x2();
21222        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21223    }
21224}
21225
21226/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21227///
21228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
21229#[inline]
21230#[target_feature(enable = "avx512f,avx512vl")]
21231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21232#[cfg_attr(test, assert_instr(vpsraq))]
21233pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21234    unsafe {
21235        let shf = _mm_sra_epi64(a, count).as_i64x2();
21236        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21237    }
21238}
21239
21240/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21241///
21242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
21243#[inline]
21244#[target_feature(enable = "avx512f")]
21245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21246#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21247#[rustc_legacy_const_generics(1)]
21248#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21249pub const fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
21250    unsafe {
21251        static_assert_uimm_bits!(IMM8, 8);
21252        transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
21253    }
21254}
21255
21256/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21257///
21258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
21259#[inline]
21260#[target_feature(enable = "avx512f")]
21261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21262#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21263#[rustc_legacy_const_generics(3)]
21264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21265pub const fn _mm512_mask_srai_epi32<const IMM8: u32>(
21266    src: __m512i,
21267    k: __mmask16,
21268    a: __m512i,
21269) -> __m512i {
21270    unsafe {
21271        static_assert_uimm_bits!(IMM8, 8);
21272        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
21273        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
21274    }
21275}
21276
21277/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21278///
21279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
21280#[inline]
21281#[target_feature(enable = "avx512f")]
21282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21283#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21284#[rustc_legacy_const_generics(2)]
21285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21286pub const fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
21287    unsafe {
21288        static_assert_uimm_bits!(IMM8, 8);
21289        let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
21290        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
21291    }
21292}
21293
21294/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21295///
21296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
21297#[inline]
21298#[target_feature(enable = "avx512f,avx512vl")]
21299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21300#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21301#[rustc_legacy_const_generics(3)]
21302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21303pub const fn _mm256_mask_srai_epi32<const IMM8: u32>(
21304    src: __m256i,
21305    k: __mmask8,
21306    a: __m256i,
21307) -> __m256i {
21308    unsafe {
21309        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
21310        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
21311    }
21312}
21313
21314/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21315///
21316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
21317#[inline]
21318#[target_feature(enable = "avx512f,avx512vl")]
21319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21320#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21321#[rustc_legacy_const_generics(2)]
21322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21323pub const fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21324    unsafe {
21325        let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
21326        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
21327    }
21328}
21329
21330/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21331///
21332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
21333#[inline]
21334#[target_feature(enable = "avx512f,avx512vl")]
21335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21336#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21337#[rustc_legacy_const_generics(3)]
21338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21339pub const fn _mm_mask_srai_epi32<const IMM8: u32>(
21340    src: __m128i,
21341    k: __mmask8,
21342    a: __m128i,
21343) -> __m128i {
21344    unsafe {
21345        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
21346        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
21347    }
21348}
21349
21350/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21351///
21352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
21353#[inline]
21354#[target_feature(enable = "avx512f,avx512vl")]
21355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21356#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
21357#[rustc_legacy_const_generics(2)]
21358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21359pub const fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21360    unsafe {
21361        let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
21362        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
21363    }
21364}
21365
21366/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21367///
21368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
21369#[inline]
21370#[target_feature(enable = "avx512f")]
21371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21372#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21373#[rustc_legacy_const_generics(1)]
21374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21375pub const fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
21376    unsafe {
21377        static_assert_uimm_bits!(IMM8, 8);
21378        transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
21379    }
21380}
21381
21382/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21383///
21384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
21385#[inline]
21386#[target_feature(enable = "avx512f")]
21387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21388#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21389#[rustc_legacy_const_generics(3)]
21390#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21391pub const fn _mm512_mask_srai_epi64<const IMM8: u32>(
21392    src: __m512i,
21393    k: __mmask8,
21394    a: __m512i,
21395) -> __m512i {
21396    unsafe {
21397        static_assert_uimm_bits!(IMM8, 8);
21398        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
21399        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21400    }
21401}
21402
21403/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21404///
21405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
21406#[inline]
21407#[target_feature(enable = "avx512f")]
21408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21409#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21410#[rustc_legacy_const_generics(2)]
21411#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21412pub const fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
21413    unsafe {
21414        static_assert_uimm_bits!(IMM8, 8);
21415        let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
21416        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21417    }
21418}
21419
21420/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21421///
21422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
21423#[inline]
21424#[target_feature(enable = "avx512f,avx512vl")]
21425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21426#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21427#[rustc_legacy_const_generics(1)]
21428#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21429pub const fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
21430    unsafe {
21431        static_assert_uimm_bits!(IMM8, 8);
21432        transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
21433    }
21434}
21435
21436/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21437///
21438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
21439#[inline]
21440#[target_feature(enable = "avx512f,avx512vl")]
21441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21442#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21443#[rustc_legacy_const_generics(3)]
21444#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21445pub const fn _mm256_mask_srai_epi64<const IMM8: u32>(
21446    src: __m256i,
21447    k: __mmask8,
21448    a: __m256i,
21449) -> __m256i {
21450    unsafe {
21451        static_assert_uimm_bits!(IMM8, 8);
21452        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21453        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21454    }
21455}
21456
21457/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21458///
21459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
21460#[inline]
21461#[target_feature(enable = "avx512f,avx512vl")]
21462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21463#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21464#[rustc_legacy_const_generics(2)]
21465#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21466pub const fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21467    unsafe {
21468        static_assert_uimm_bits!(IMM8, 8);
21469        let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
21470        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21471    }
21472}
21473
21474/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21475///
21476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
21477#[inline]
21478#[target_feature(enable = "avx512f,avx512vl")]
21479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21480#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21481#[rustc_legacy_const_generics(1)]
21482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21483pub const fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
21484    unsafe {
21485        static_assert_uimm_bits!(IMM8, 8);
21486        transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
21487    }
21488}
21489
21490/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21491///
21492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
21493#[inline]
21494#[target_feature(enable = "avx512f,avx512vl")]
21495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21496#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21497#[rustc_legacy_const_generics(3)]
21498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21499pub const fn _mm_mask_srai_epi64<const IMM8: u32>(
21500    src: __m128i,
21501    k: __mmask8,
21502    a: __m128i,
21503) -> __m128i {
21504    unsafe {
21505        static_assert_uimm_bits!(IMM8, 8);
21506        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21507        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21508    }
21509}
21510
21511/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21512///
21513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
21514#[inline]
21515#[target_feature(enable = "avx512f,avx512vl")]
21516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21517#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21518#[rustc_legacy_const_generics(2)]
21519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21520pub const fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21521    unsafe {
21522        static_assert_uimm_bits!(IMM8, 8);
21523        let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
21524        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21525    }
21526}
21527
21528/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21529///
21530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21531#[inline]
21532#[target_feature(enable = "avx512f")]
21533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21534#[cfg_attr(test, assert_instr(vpsravd))]
21535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21536pub const fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21537    unsafe {
21538        let count = count.as_u32x16();
21539        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
21540        let count = simd_select(no_overflow, transmute(count), i32x16::splat(31));
21541        simd_shr(a.as_i32x16(), count).as_m512i()
21542    }
21543}
21544
21545/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21546///
21547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21548#[inline]
21549#[target_feature(enable = "avx512f")]
21550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21551#[cfg_attr(test, assert_instr(vpsravd))]
21552#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21553pub const fn _mm512_mask_srav_epi32(
21554    src: __m512i,
21555    k: __mmask16,
21556    a: __m512i,
21557    count: __m512i,
21558) -> __m512i {
21559    unsafe {
21560        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21561        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
21562    }
21563}
21564
21565/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21566///
21567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21568#[inline]
21569#[target_feature(enable = "avx512f")]
21570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21571#[cfg_attr(test, assert_instr(vpsravd))]
21572#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21573pub const fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21574    unsafe {
21575        let shf = _mm512_srav_epi32(a, count).as_i32x16();
21576        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
21577    }
21578}
21579
21580/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21581///
21582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21583#[inline]
21584#[target_feature(enable = "avx512f,avx512vl")]
21585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21586#[cfg_attr(test, assert_instr(vpsravd))]
21587#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21588pub const fn _mm256_mask_srav_epi32(
21589    src: __m256i,
21590    k: __mmask8,
21591    a: __m256i,
21592    count: __m256i,
21593) -> __m256i {
21594    unsafe {
21595        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21596        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
21597    }
21598}
21599
21600/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21601///
21602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21603#[inline]
21604#[target_feature(enable = "avx512f,avx512vl")]
21605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21606#[cfg_attr(test, assert_instr(vpsravd))]
21607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21608pub const fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21609    unsafe {
21610        let shf = _mm256_srav_epi32(a, count).as_i32x8();
21611        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
21612    }
21613}
21614
21615/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21616///
21617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21618#[inline]
21619#[target_feature(enable = "avx512f,avx512vl")]
21620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21621#[cfg_attr(test, assert_instr(vpsravd))]
21622#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21623pub const fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21624    unsafe {
21625        let shf = _mm_srav_epi32(a, count).as_i32x4();
21626        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
21627    }
21628}
21629
21630/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21631///
21632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21633#[inline]
21634#[target_feature(enable = "avx512f,avx512vl")]
21635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21636#[cfg_attr(test, assert_instr(vpsravd))]
21637#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21638pub const fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21639    unsafe {
21640        let shf = _mm_srav_epi32(a, count).as_i32x4();
21641        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
21642    }
21643}
21644
21645/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21646///
21647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21648#[inline]
21649#[target_feature(enable = "avx512f")]
21650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21651#[cfg_attr(test, assert_instr(vpsravq))]
21652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21653pub const fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21654    unsafe {
21655        let count = count.as_u64x8();
21656        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
21657        let count = simd_select(no_overflow, transmute(count), i64x8::splat(63));
21658        simd_shr(a.as_i64x8(), count).as_m512i()
21659    }
21660}
21661
21662/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21663///
21664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21665#[inline]
21666#[target_feature(enable = "avx512f")]
21667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21668#[cfg_attr(test, assert_instr(vpsravq))]
21669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21670pub const fn _mm512_mask_srav_epi64(
21671    src: __m512i,
21672    k: __mmask8,
21673    a: __m512i,
21674    count: __m512i,
21675) -> __m512i {
21676    unsafe {
21677        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21678        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
21679    }
21680}
21681
21682/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21683///
21684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21685#[inline]
21686#[target_feature(enable = "avx512f")]
21687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21688#[cfg_attr(test, assert_instr(vpsravq))]
21689#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21690pub const fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21691    unsafe {
21692        let shf = _mm512_srav_epi64(a, count).as_i64x8();
21693        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
21694    }
21695}
21696
21697/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21698///
21699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21700#[inline]
21701#[target_feature(enable = "avx512f,avx512vl")]
21702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21703#[cfg_attr(test, assert_instr(vpsravq))]
21704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21705pub const fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21706    unsafe {
21707        let count = count.as_u64x4();
21708        let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
21709        let count = simd_select(no_overflow, transmute(count), i64x4::splat(63));
21710        simd_shr(a.as_i64x4(), count).as_m256i()
21711    }
21712}
21713
21714/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21715///
21716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21717#[inline]
21718#[target_feature(enable = "avx512f,avx512vl")]
21719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21720#[cfg_attr(test, assert_instr(vpsravq))]
21721#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21722pub const fn _mm256_mask_srav_epi64(
21723    src: __m256i,
21724    k: __mmask8,
21725    a: __m256i,
21726    count: __m256i,
21727) -> __m256i {
21728    unsafe {
21729        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21730        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
21731    }
21732}
21733
21734/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21735///
21736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21737#[inline]
21738#[target_feature(enable = "avx512f,avx512vl")]
21739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21740#[cfg_attr(test, assert_instr(vpsravq))]
21741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21742pub const fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21743    unsafe {
21744        let shf = _mm256_srav_epi64(a, count).as_i64x4();
21745        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
21746    }
21747}
21748
21749/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21750///
21751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21752#[inline]
21753#[target_feature(enable = "avx512f,avx512vl")]
21754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21755#[cfg_attr(test, assert_instr(vpsravq))]
21756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21757pub const fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21758    unsafe {
21759        let count = count.as_u64x2();
21760        let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
21761        let count = simd_select(no_overflow, transmute(count), i64x2::splat(63));
21762        simd_shr(a.as_i64x2(), count).as_m128i()
21763    }
21764}
21765
21766/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21767///
21768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21769#[inline]
21770#[target_feature(enable = "avx512f,avx512vl")]
21771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21772#[cfg_attr(test, assert_instr(vpsravq))]
21773#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21774pub const fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21775    unsafe {
21776        let shf = _mm_srav_epi64(a, count).as_i64x2();
21777        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
21778    }
21779}
21780
21781/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21782///
21783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21784#[inline]
21785#[target_feature(enable = "avx512f,avx512vl")]
21786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21787#[cfg_attr(test, assert_instr(vpsravq))]
21788#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21789pub const fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21790    unsafe {
21791        let shf = _mm_srav_epi64(a, count).as_i64x2();
21792        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
21793    }
21794}
21795
21796/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21797///
21798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21799#[inline]
21800#[target_feature(enable = "avx512f")]
21801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21802#[cfg_attr(test, assert_instr(vprolvd))]
21803#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21804pub const fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21805    unsafe {
21806        transmute(simd_funnel_shl(
21807            a.as_u32x16(),
21808            a.as_u32x16(),
21809            simd_and(b.as_u32x16(), u32x16::splat(31)),
21810        ))
21811    }
21812}
21813
21814/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21815///
21816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21817#[inline]
21818#[target_feature(enable = "avx512f")]
21819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21820#[cfg_attr(test, assert_instr(vprolvd))]
21821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21822pub const fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21823    unsafe {
21824        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21825        transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
21826    }
21827}
21828
21829/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21830///
21831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21832#[inline]
21833#[target_feature(enable = "avx512f")]
21834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21835#[cfg_attr(test, assert_instr(vprolvd))]
21836#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21837pub const fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21838    unsafe {
21839        let rol = _mm512_rolv_epi32(a, b).as_i32x16();
21840        transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
21841    }
21842}
21843
21844/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21845///
21846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21847#[inline]
21848#[target_feature(enable = "avx512f,avx512vl")]
21849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21850#[cfg_attr(test, assert_instr(vprolvd))]
21851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21852pub const fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21853    unsafe {
21854        transmute(simd_funnel_shl(
21855            a.as_u32x8(),
21856            a.as_u32x8(),
21857            simd_and(b.as_u32x8(), u32x8::splat(31)),
21858        ))
21859    }
21860}
21861
21862/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21863///
21864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21865#[inline]
21866#[target_feature(enable = "avx512f,avx512vl")]
21867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21868#[cfg_attr(test, assert_instr(vprolvd))]
21869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21870pub const fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21871    unsafe {
21872        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21873        transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
21874    }
21875}
21876
21877/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21878///
21879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21880#[inline]
21881#[target_feature(enable = "avx512f,avx512vl")]
21882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21883#[cfg_attr(test, assert_instr(vprolvd))]
21884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21885pub const fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21886    unsafe {
21887        let rol = _mm256_rolv_epi32(a, b).as_i32x8();
21888        transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
21889    }
21890}
21891
21892/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21893///
21894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21895#[inline]
21896#[target_feature(enable = "avx512f,avx512vl")]
21897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21898#[cfg_attr(test, assert_instr(vprolvd))]
21899#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21900pub const fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21901    unsafe {
21902        transmute(simd_funnel_shl(
21903            a.as_u32x4(),
21904            a.as_u32x4(),
21905            simd_and(b.as_u32x4(), u32x4::splat(31)),
21906        ))
21907    }
21908}
21909
21910/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21911///
21912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21913#[inline]
21914#[target_feature(enable = "avx512f,avx512vl")]
21915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21916#[cfg_attr(test, assert_instr(vprolvd))]
21917#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21918pub const fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21919    unsafe {
21920        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21921        transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
21922    }
21923}
21924
21925/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21926///
21927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21928#[inline]
21929#[target_feature(enable = "avx512f,avx512vl")]
21930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21931#[cfg_attr(test, assert_instr(vprolvd))]
21932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21933pub const fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21934    unsafe {
21935        let rol = _mm_rolv_epi32(a, b).as_i32x4();
21936        transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
21937    }
21938}
21939
21940/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21941///
21942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21943#[inline]
21944#[target_feature(enable = "avx512f")]
21945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21946#[cfg_attr(test, assert_instr(vprorvd))]
21947#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21948pub const fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21949    unsafe {
21950        transmute(simd_funnel_shr(
21951            a.as_u32x16(),
21952            a.as_u32x16(),
21953            simd_and(b.as_u32x16(), u32x16::splat(31)),
21954        ))
21955    }
21956}
21957
21958/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21959///
21960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21961#[inline]
21962#[target_feature(enable = "avx512f")]
21963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21964#[cfg_attr(test, assert_instr(vprorvd))]
21965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21966pub const fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21967    unsafe {
21968        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21969        transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
21970    }
21971}
21972
21973/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21974///
21975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21976#[inline]
21977#[target_feature(enable = "avx512f")]
21978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21979#[cfg_attr(test, assert_instr(vprorvd))]
21980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21981pub const fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21982    unsafe {
21983        let ror = _mm512_rorv_epi32(a, b).as_i32x16();
21984        transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
21985    }
21986}
21987
21988/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21989///
21990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21991#[inline]
21992#[target_feature(enable = "avx512f,avx512vl")]
21993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21994#[cfg_attr(test, assert_instr(vprorvd))]
21995#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
21996pub const fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21997    unsafe {
21998        transmute(simd_funnel_shr(
21999            a.as_u32x8(),
22000            a.as_u32x8(),
22001            simd_and(b.as_u32x8(), u32x8::splat(31)),
22002        ))
22003    }
22004}
22005
22006/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22007///
22008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
22009#[inline]
22010#[target_feature(enable = "avx512f,avx512vl")]
22011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22012#[cfg_attr(test, assert_instr(vprorvd))]
22013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22014pub const fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22015    unsafe {
22016        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
22017        transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
22018    }
22019}
22020
22021/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22022///
22023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
22024#[inline]
22025#[target_feature(enable = "avx512f,avx512vl")]
22026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22027#[cfg_attr(test, assert_instr(vprorvd))]
22028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22029pub const fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22030    unsafe {
22031        let ror = _mm256_rorv_epi32(a, b).as_i32x8();
22032        transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
22033    }
22034}
22035
22036/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22037///
22038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
22039#[inline]
22040#[target_feature(enable = "avx512f,avx512vl")]
22041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22042#[cfg_attr(test, assert_instr(vprorvd))]
22043#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22044pub const fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
22045    unsafe {
22046        transmute(simd_funnel_shr(
22047            a.as_u32x4(),
22048            a.as_u32x4(),
22049            simd_and(b.as_u32x4(), u32x4::splat(31)),
22050        ))
22051    }
22052}
22053
22054/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22055///
22056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
22057#[inline]
22058#[target_feature(enable = "avx512f,avx512vl")]
22059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22060#[cfg_attr(test, assert_instr(vprorvd))]
22061#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22062pub const fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22063    unsafe {
22064        let ror = _mm_rorv_epi32(a, b).as_i32x4();
22065        transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
22066    }
22067}
22068
22069/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22070///
22071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
22072#[inline]
22073#[target_feature(enable = "avx512f,avx512vl")]
22074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22075#[cfg_attr(test, assert_instr(vprorvd))]
22076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22077pub const fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22078    unsafe {
22079        let ror = _mm_rorv_epi32(a, b).as_i32x4();
22080        transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
22081    }
22082}
22083
22084/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22085///
22086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
22087#[inline]
22088#[target_feature(enable = "avx512f")]
22089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22090#[cfg_attr(test, assert_instr(vprolvq))]
22091#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22092pub const fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
22093    unsafe {
22094        transmute(simd_funnel_shl(
22095            a.as_u64x8(),
22096            a.as_u64x8(),
22097            simd_and(b.as_u64x8(), u64x8::splat(63)),
22098        ))
22099    }
22100}
22101
22102/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22103///
22104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
22105#[inline]
22106#[target_feature(enable = "avx512f")]
22107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22108#[cfg_attr(test, assert_instr(vprolvq))]
22109#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22110pub const fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22111    unsafe {
22112        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
22113        transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
22114    }
22115}
22116
22117/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22118///
22119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
22120#[inline]
22121#[target_feature(enable = "avx512f")]
22122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22123#[cfg_attr(test, assert_instr(vprolvq))]
22124#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22125pub const fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22126    unsafe {
22127        let rol = _mm512_rolv_epi64(a, b).as_i64x8();
22128        transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
22129    }
22130}
22131
22132/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22133///
22134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
22135#[inline]
22136#[target_feature(enable = "avx512f,avx512vl")]
22137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22138#[cfg_attr(test, assert_instr(vprolvq))]
22139#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22140pub const fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
22141    unsafe {
22142        transmute(simd_funnel_shl(
22143            a.as_u64x4(),
22144            a.as_u64x4(),
22145            simd_and(b.as_u64x4(), u64x4::splat(63)),
22146        ))
22147    }
22148}
22149
22150/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22151///
22152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
22153#[inline]
22154#[target_feature(enable = "avx512f,avx512vl")]
22155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22156#[cfg_attr(test, assert_instr(vprolvq))]
22157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22158pub const fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22159    unsafe {
22160        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
22161        transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
22162    }
22163}
22164
22165/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22166///
22167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
22168#[inline]
22169#[target_feature(enable = "avx512f,avx512vl")]
22170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22171#[cfg_attr(test, assert_instr(vprolvq))]
22172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22173pub const fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22174    unsafe {
22175        let rol = _mm256_rolv_epi64(a, b).as_i64x4();
22176        transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
22177    }
22178}
22179
22180/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
22181///
22182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
22183#[inline]
22184#[target_feature(enable = "avx512f,avx512vl")]
22185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22186#[cfg_attr(test, assert_instr(vprolvq))]
22187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22188pub const fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
22189    unsafe {
22190        transmute(simd_funnel_shl(
22191            a.as_u64x2(),
22192            a.as_u64x2(),
22193            simd_and(b.as_u64x2(), u64x2::splat(63)),
22194        ))
22195    }
22196}
22197
22198/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22199///
22200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
22201#[inline]
22202#[target_feature(enable = "avx512f,avx512vl")]
22203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22204#[cfg_attr(test, assert_instr(vprolvq))]
22205#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22206pub const fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22207    unsafe {
22208        let rol = _mm_rolv_epi64(a, b).as_i64x2();
22209        transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
22210    }
22211}
22212
22213/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22214///
22215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
22216#[inline]
22217#[target_feature(enable = "avx512f,avx512vl")]
22218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22219#[cfg_attr(test, assert_instr(vprolvq))]
22220#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22221pub const fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22222    unsafe {
22223        let rol = _mm_rolv_epi64(a, b).as_i64x2();
22224        transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
22225    }
22226}
22227
22228/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22229///
22230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
22231#[inline]
22232#[target_feature(enable = "avx512f")]
22233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22234#[cfg_attr(test, assert_instr(vprorvq))]
22235#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22236pub const fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
22237    unsafe {
22238        transmute(simd_funnel_shr(
22239            a.as_u64x8(),
22240            a.as_u64x8(),
22241            simd_and(b.as_u64x8(), u64x8::splat(63)),
22242        ))
22243    }
22244}
22245
22246/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22247///
22248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
22249#[inline]
22250#[target_feature(enable = "avx512f")]
22251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22252#[cfg_attr(test, assert_instr(vprorvq))]
22253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22254pub const fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22255    unsafe {
22256        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
22257        transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
22258    }
22259}
22260
22261/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22262///
22263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
22264#[inline]
22265#[target_feature(enable = "avx512f")]
22266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22267#[cfg_attr(test, assert_instr(vprorvq))]
22268#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22269pub const fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
22270    unsafe {
22271        let ror = _mm512_rorv_epi64(a, b).as_i64x8();
22272        transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
22273    }
22274}
22275
22276/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22277///
22278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
22279#[inline]
22280#[target_feature(enable = "avx512f,avx512vl")]
22281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22282#[cfg_attr(test, assert_instr(vprorvq))]
22283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22284pub const fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
22285    unsafe {
22286        transmute(simd_funnel_shr(
22287            a.as_u64x4(),
22288            a.as_u64x4(),
22289            simd_and(b.as_u64x4(), u64x4::splat(63)),
22290        ))
22291    }
22292}
22293
22294/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22295///
22296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
22297#[inline]
22298#[target_feature(enable = "avx512f,avx512vl")]
22299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22300#[cfg_attr(test, assert_instr(vprorvq))]
22301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22302pub const fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22303    unsafe {
22304        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
22305        transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
22306    }
22307}
22308
22309/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22310///
22311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
22312#[inline]
22313#[target_feature(enable = "avx512f,avx512vl")]
22314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22315#[cfg_attr(test, assert_instr(vprorvq))]
22316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22317pub const fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
22318    unsafe {
22319        let ror = _mm256_rorv_epi64(a, b).as_i64x4();
22320        transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
22321    }
22322}
22323
22324/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
22325///
22326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
22327#[inline]
22328#[target_feature(enable = "avx512f,avx512vl")]
22329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22330#[cfg_attr(test, assert_instr(vprorvq))]
22331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22332pub const fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
22333    unsafe {
22334        transmute(simd_funnel_shr(
22335            a.as_u64x2(),
22336            a.as_u64x2(),
22337            simd_and(b.as_u64x2(), u64x2::splat(63)),
22338        ))
22339    }
22340}
22341
22342/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22343///
22344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
22345#[inline]
22346#[target_feature(enable = "avx512f,avx512vl")]
22347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22348#[cfg_attr(test, assert_instr(vprorvq))]
22349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22350pub const fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22351    unsafe {
22352        let ror = _mm_rorv_epi64(a, b).as_i64x2();
22353        transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
22354    }
22355}
22356
22357/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22358///
22359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
22360#[inline]
22361#[target_feature(enable = "avx512f,avx512vl")]
22362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22363#[cfg_attr(test, assert_instr(vprorvq))]
22364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22365pub const fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
22366    unsafe {
22367        let ror = _mm_rorv_epi64(a, b).as_i64x2();
22368        transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
22369    }
22370}
22371
22372/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22373///
22374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
22375#[inline]
22376#[target_feature(enable = "avx512f")]
22377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22378#[cfg_attr(test, assert_instr(vpsllvd))]
22379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22380pub const fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
22381    unsafe {
22382        let count = count.as_u32x16();
22383        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
22384        let count = simd_select(no_overflow, count, u32x16::ZERO);
22385        simd_select(no_overflow, simd_shl(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
22386    }
22387}
22388
22389/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22390///
22391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
22392#[inline]
22393#[target_feature(enable = "avx512f")]
22394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22395#[cfg_attr(test, assert_instr(vpsllvd))]
22396#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22397pub const fn _mm512_mask_sllv_epi32(
22398    src: __m512i,
22399    k: __mmask16,
22400    a: __m512i,
22401    count: __m512i,
22402) -> __m512i {
22403    unsafe {
22404        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
22405        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
22406    }
22407}
22408
22409/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22410///
22411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
22412#[inline]
22413#[target_feature(enable = "avx512f")]
22414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22415#[cfg_attr(test, assert_instr(vpsllvd))]
22416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22417pub const fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22418    unsafe {
22419        let shf = _mm512_sllv_epi32(a, count).as_i32x16();
22420        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
22421    }
22422}
22423
22424/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22425///
22426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
22427#[inline]
22428#[target_feature(enable = "avx512f,avx512vl")]
22429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22430#[cfg_attr(test, assert_instr(vpsllvd))]
22431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22432pub const fn _mm256_mask_sllv_epi32(
22433    src: __m256i,
22434    k: __mmask8,
22435    a: __m256i,
22436    count: __m256i,
22437) -> __m256i {
22438    unsafe {
22439        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
22440        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
22441    }
22442}
22443
22444/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22445///
22446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
22447#[inline]
22448#[target_feature(enable = "avx512f,avx512vl")]
22449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22450#[cfg_attr(test, assert_instr(vpsllvd))]
22451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22452pub const fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22453    unsafe {
22454        let shf = _mm256_sllv_epi32(a, count).as_i32x8();
22455        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
22456    }
22457}
22458
22459/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22460///
22461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
22462#[inline]
22463#[target_feature(enable = "avx512f,avx512vl")]
22464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22465#[cfg_attr(test, assert_instr(vpsllvd))]
22466#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22467pub const fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22468    unsafe {
22469        let shf = _mm_sllv_epi32(a, count).as_i32x4();
22470        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
22471    }
22472}
22473
22474/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22475///
22476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
22477#[inline]
22478#[target_feature(enable = "avx512f,avx512vl")]
22479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22480#[cfg_attr(test, assert_instr(vpsllvd))]
22481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22482pub const fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22483    unsafe {
22484        let shf = _mm_sllv_epi32(a, count).as_i32x4();
22485        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
22486    }
22487}
22488
22489/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22490///
22491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
22492#[inline]
22493#[target_feature(enable = "avx512f")]
22494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22495#[cfg_attr(test, assert_instr(vpsrlvd))]
22496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22497pub const fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
22498    unsafe {
22499        let count = count.as_u32x16();
22500        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
22501        let count = simd_select(no_overflow, count, u32x16::ZERO);
22502        simd_select(no_overflow, simd_shr(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
22503    }
22504}
22505
22506/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22507///
22508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
22509#[inline]
22510#[target_feature(enable = "avx512f")]
22511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22512#[cfg_attr(test, assert_instr(vpsrlvd))]
22513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22514pub const fn _mm512_mask_srlv_epi32(
22515    src: __m512i,
22516    k: __mmask16,
22517    a: __m512i,
22518    count: __m512i,
22519) -> __m512i {
22520    unsafe {
22521        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
22522        transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
22523    }
22524}
22525
22526/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22527///
22528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
22529#[inline]
22530#[target_feature(enable = "avx512f")]
22531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22532#[cfg_attr(test, assert_instr(vpsrlvd))]
22533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22534pub const fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
22535    unsafe {
22536        let shf = _mm512_srlv_epi32(a, count).as_i32x16();
22537        transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
22538    }
22539}
22540
22541/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22542///
22543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
22544#[inline]
22545#[target_feature(enable = "avx512f,avx512vl")]
22546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22547#[cfg_attr(test, assert_instr(vpsrlvd))]
22548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22549pub const fn _mm256_mask_srlv_epi32(
22550    src: __m256i,
22551    k: __mmask8,
22552    a: __m256i,
22553    count: __m256i,
22554) -> __m256i {
22555    unsafe {
22556        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
22557        transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
22558    }
22559}
22560
22561/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22562///
22563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
22564#[inline]
22565#[target_feature(enable = "avx512f,avx512vl")]
22566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22567#[cfg_attr(test, assert_instr(vpsrlvd))]
22568#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22569pub const fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22570    unsafe {
22571        let shf = _mm256_srlv_epi32(a, count).as_i32x8();
22572        transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
22573    }
22574}
22575
22576/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22577///
22578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
22579#[inline]
22580#[target_feature(enable = "avx512f,avx512vl")]
22581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22582#[cfg_attr(test, assert_instr(vpsrlvd))]
22583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22584pub const fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22585    unsafe {
22586        let shf = _mm_srlv_epi32(a, count).as_i32x4();
22587        transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
22588    }
22589}
22590
22591/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22592///
22593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
22594#[inline]
22595#[target_feature(enable = "avx512f,avx512vl")]
22596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22597#[cfg_attr(test, assert_instr(vpsrlvd))]
22598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22599pub const fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22600    unsafe {
22601        let shf = _mm_srlv_epi32(a, count).as_i32x4();
22602        transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
22603    }
22604}
22605
22606/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22607///
22608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
22609#[inline]
22610#[target_feature(enable = "avx512f")]
22611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22612#[cfg_attr(test, assert_instr(vpsllvq))]
22613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22614pub const fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
22615    unsafe {
22616        let count = count.as_u64x8();
22617        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
22618        let count = simd_select(no_overflow, count, u64x8::ZERO);
22619        simd_select(no_overflow, simd_shl(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
22620    }
22621}
22622
22623/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22624///
22625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
22626#[inline]
22627#[target_feature(enable = "avx512f")]
22628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22629#[cfg_attr(test, assert_instr(vpsllvq))]
22630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22631pub const fn _mm512_mask_sllv_epi64(
22632    src: __m512i,
22633    k: __mmask8,
22634    a: __m512i,
22635    count: __m512i,
22636) -> __m512i {
22637    unsafe {
22638        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
22639        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22640    }
22641}
22642
22643/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22644///
22645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
22646#[inline]
22647#[target_feature(enable = "avx512f")]
22648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22649#[cfg_attr(test, assert_instr(vpsllvq))]
22650#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22651pub const fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22652    unsafe {
22653        let shf = _mm512_sllv_epi64(a, count).as_i64x8();
22654        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22655    }
22656}
22657
22658/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22659///
22660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
22661#[inline]
22662#[target_feature(enable = "avx512f,avx512vl")]
22663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22664#[cfg_attr(test, assert_instr(vpsllvq))]
22665#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22666pub const fn _mm256_mask_sllv_epi64(
22667    src: __m256i,
22668    k: __mmask8,
22669    a: __m256i,
22670    count: __m256i,
22671) -> __m256i {
22672    unsafe {
22673        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
22674        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22675    }
22676}
22677
22678/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22679///
22680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
22681#[inline]
22682#[target_feature(enable = "avx512f,avx512vl")]
22683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22684#[cfg_attr(test, assert_instr(vpsllvq))]
22685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22686pub const fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22687    unsafe {
22688        let shf = _mm256_sllv_epi64(a, count).as_i64x4();
22689        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22690    }
22691}
22692
22693/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22694///
22695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
22696#[inline]
22697#[target_feature(enable = "avx512f,avx512vl")]
22698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22699#[cfg_attr(test, assert_instr(vpsllvq))]
22700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22701pub const fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22702    unsafe {
22703        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22704        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22705    }
22706}
22707
22708/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22709///
22710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
22711#[inline]
22712#[target_feature(enable = "avx512f,avx512vl")]
22713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22714#[cfg_attr(test, assert_instr(vpsllvq))]
22715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22716pub const fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22717    unsafe {
22718        let shf = _mm_sllv_epi64(a, count).as_i64x2();
22719        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22720    }
22721}
22722
22723/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22724///
22725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
22726#[inline]
22727#[target_feature(enable = "avx512f")]
22728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22729#[cfg_attr(test, assert_instr(vpsrlvq))]
22730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22731pub const fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
22732    unsafe {
22733        let count = count.as_u64x8();
22734        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
22735        let count = simd_select(no_overflow, count, u64x8::ZERO);
22736        simd_select(no_overflow, simd_shr(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
22737    }
22738}
22739
22740/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22741///
22742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
22743#[inline]
22744#[target_feature(enable = "avx512f")]
22745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22746#[cfg_attr(test, assert_instr(vpsrlvq))]
22747#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22748pub const fn _mm512_mask_srlv_epi64(
22749    src: __m512i,
22750    k: __mmask8,
22751    a: __m512i,
22752    count: __m512i,
22753) -> __m512i {
22754    unsafe {
22755        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22756        transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
22757    }
22758}
22759
22760/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22761///
22762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22763#[inline]
22764#[target_feature(enable = "avx512f")]
22765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22766#[cfg_attr(test, assert_instr(vpsrlvq))]
22767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22768pub const fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22769    unsafe {
22770        let shf = _mm512_srlv_epi64(a, count).as_i64x8();
22771        transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
22772    }
22773}
22774
22775/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22776///
22777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22778#[inline]
22779#[target_feature(enable = "avx512f,avx512vl")]
22780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22781#[cfg_attr(test, assert_instr(vpsrlvq))]
22782#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22783pub const fn _mm256_mask_srlv_epi64(
22784    src: __m256i,
22785    k: __mmask8,
22786    a: __m256i,
22787    count: __m256i,
22788) -> __m256i {
22789    unsafe {
22790        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22791        transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
22792    }
22793}
22794
22795/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22796///
22797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22798#[inline]
22799#[target_feature(enable = "avx512f,avx512vl")]
22800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22801#[cfg_attr(test, assert_instr(vpsrlvq))]
22802#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22803pub const fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22804    unsafe {
22805        let shf = _mm256_srlv_epi64(a, count).as_i64x4();
22806        transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
22807    }
22808}
22809
22810/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22811///
22812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22813#[inline]
22814#[target_feature(enable = "avx512f,avx512vl")]
22815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22816#[cfg_attr(test, assert_instr(vpsrlvq))]
22817#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22818pub const fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22819    unsafe {
22820        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22821        transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
22822    }
22823}
22824
22825/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22826///
22827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22828#[inline]
22829#[target_feature(enable = "avx512f,avx512vl")]
22830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22831#[cfg_attr(test, assert_instr(vpsrlvq))]
22832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22833pub const fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22834    unsafe {
22835        let shf = _mm_srlv_epi64(a, count).as_i64x2();
22836        transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
22837    }
22838}
22839
22840/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22841///
22842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22843#[inline]
22844#[target_feature(enable = "avx512f")]
22845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22846#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22847#[rustc_legacy_const_generics(1)]
22848#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22849pub const fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22850    unsafe {
22851        static_assert_uimm_bits!(MASK, 8);
22852        simd_shuffle!(
22853            a,
22854            a,
22855            [
22856                MASK as u32 & 0b11,
22857                (MASK as u32 >> 2) & 0b11,
22858                ((MASK as u32 >> 4) & 0b11),
22859                ((MASK as u32 >> 6) & 0b11),
22860                (MASK as u32 & 0b11) + 4,
22861                ((MASK as u32 >> 2) & 0b11) + 4,
22862                ((MASK as u32 >> 4) & 0b11) + 4,
22863                ((MASK as u32 >> 6) & 0b11) + 4,
22864                (MASK as u32 & 0b11) + 8,
22865                ((MASK as u32 >> 2) & 0b11) + 8,
22866                ((MASK as u32 >> 4) & 0b11) + 8,
22867                ((MASK as u32 >> 6) & 0b11) + 8,
22868                (MASK as u32 & 0b11) + 12,
22869                ((MASK as u32 >> 2) & 0b11) + 12,
22870                ((MASK as u32 >> 4) & 0b11) + 12,
22871                ((MASK as u32 >> 6) & 0b11) + 12,
22872            ],
22873        )
22874    }
22875}
22876
22877/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22878///
22879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22880#[inline]
22881#[target_feature(enable = "avx512f")]
22882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22883#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22884#[rustc_legacy_const_generics(3)]
22885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22886pub const fn _mm512_mask_permute_ps<const MASK: i32>(
22887    src: __m512,
22888    k: __mmask16,
22889    a: __m512,
22890) -> __m512 {
22891    unsafe {
22892        static_assert_uimm_bits!(MASK, 8);
22893        let r = _mm512_permute_ps::<MASK>(a);
22894        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
22895    }
22896}
22897
22898/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22899///
22900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22901#[inline]
22902#[target_feature(enable = "avx512f")]
22903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22904#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22905#[rustc_legacy_const_generics(2)]
22906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22907pub const fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22908    unsafe {
22909        static_assert_uimm_bits!(MASK, 8);
22910        let r = _mm512_permute_ps::<MASK>(a);
22911        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
22912    }
22913}
22914
22915/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22916///
22917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22918#[inline]
22919#[target_feature(enable = "avx512f,avx512vl")]
22920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22921#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22922#[rustc_legacy_const_generics(3)]
22923#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22924pub const fn _mm256_mask_permute_ps<const MASK: i32>(
22925    src: __m256,
22926    k: __mmask8,
22927    a: __m256,
22928) -> __m256 {
22929    unsafe {
22930        let r = _mm256_permute_ps::<MASK>(a);
22931        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
22932    }
22933}
22934
22935/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22936///
22937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22938#[inline]
22939#[target_feature(enable = "avx512f,avx512vl")]
22940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22941#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22942#[rustc_legacy_const_generics(2)]
22943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22944pub const fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22945    unsafe {
22946        let r = _mm256_permute_ps::<MASK>(a);
22947        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
22948    }
22949}
22950
22951/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22954#[inline]
22955#[target_feature(enable = "avx512f,avx512vl")]
22956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22957#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22958#[rustc_legacy_const_generics(3)]
22959#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22960pub const fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22961    unsafe {
22962        let r = _mm_permute_ps::<MASK>(a);
22963        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
22964    }
22965}
22966
22967/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22968///
22969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22970#[inline]
22971#[target_feature(enable = "avx512f,avx512vl")]
22972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22973#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22974#[rustc_legacy_const_generics(2)]
22975#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22976pub const fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22977    unsafe {
22978        let r = _mm_permute_ps::<MASK>(a);
22979        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
22980    }
22981}
22982
22983/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22984///
22985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22986#[inline]
22987#[target_feature(enable = "avx512f")]
22988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22989#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22990#[rustc_legacy_const_generics(1)]
22991#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
22992pub const fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22993    unsafe {
22994        static_assert_uimm_bits!(MASK, 8);
22995        simd_shuffle!(
22996            a,
22997            a,
22998            [
22999                MASK as u32 & 0b1,
23000                ((MASK as u32 >> 1) & 0b1),
23001                ((MASK as u32 >> 2) & 0b1) + 2,
23002                ((MASK as u32 >> 3) & 0b1) + 2,
23003                ((MASK as u32 >> 4) & 0b1) + 4,
23004                ((MASK as u32 >> 5) & 0b1) + 4,
23005                ((MASK as u32 >> 6) & 0b1) + 6,
23006                ((MASK as u32 >> 7) & 0b1) + 6,
23007            ],
23008        )
23009    }
23010}
23011
23012/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23013///
23014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
23015#[inline]
23016#[target_feature(enable = "avx512f")]
23017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23018#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
23019#[rustc_legacy_const_generics(3)]
23020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23021pub const fn _mm512_mask_permute_pd<const MASK: i32>(
23022    src: __m512d,
23023    k: __mmask8,
23024    a: __m512d,
23025) -> __m512d {
23026    unsafe {
23027        static_assert_uimm_bits!(MASK, 8);
23028        let r = _mm512_permute_pd::<MASK>(a);
23029        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
23030    }
23031}
23032
23033/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23034///
23035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
23036#[inline]
23037#[target_feature(enable = "avx512f")]
23038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23039#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
23040#[rustc_legacy_const_generics(2)]
23041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23042pub const fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23043    unsafe {
23044        static_assert_uimm_bits!(MASK, 8);
23045        let r = _mm512_permute_pd::<MASK>(a);
23046        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
23047    }
23048}
23049
23050/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23051///
23052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
23053#[inline]
23054#[target_feature(enable = "avx512f,avx512vl")]
23055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23056#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
23057#[rustc_legacy_const_generics(3)]
23058#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23059pub const fn _mm256_mask_permute_pd<const MASK: i32>(
23060    src: __m256d,
23061    k: __mmask8,
23062    a: __m256d,
23063) -> __m256d {
23064    unsafe {
23065        static_assert_uimm_bits!(MASK, 4);
23066        let r = _mm256_permute_pd::<MASK>(a);
23067        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
23068    }
23069}
23070
23071/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23072///
23073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
23074#[inline]
23075#[target_feature(enable = "avx512f,avx512vl")]
23076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23077#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
23078#[rustc_legacy_const_generics(2)]
23079#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23080pub const fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23081    unsafe {
23082        static_assert_uimm_bits!(MASK, 4);
23083        let r = _mm256_permute_pd::<MASK>(a);
23084        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
23085    }
23086}
23087
23088/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23089///
23090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
23091#[inline]
23092#[target_feature(enable = "avx512f,avx512vl")]
23093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23094#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
23095#[rustc_legacy_const_generics(3)]
23096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23097pub const fn _mm_mask_permute_pd<const IMM2: i32>(
23098    src: __m128d,
23099    k: __mmask8,
23100    a: __m128d,
23101) -> __m128d {
23102    unsafe {
23103        static_assert_uimm_bits!(IMM2, 2);
23104        let r = _mm_permute_pd::<IMM2>(a);
23105        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
23106    }
23107}
23108
23109/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23110///
23111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
23112#[inline]
23113#[target_feature(enable = "avx512f,avx512vl")]
23114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23115#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
23116#[rustc_legacy_const_generics(2)]
23117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23118pub const fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
23119    unsafe {
23120        static_assert_uimm_bits!(IMM2, 2);
23121        let r = _mm_permute_pd::<IMM2>(a);
23122        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
23123    }
23124}
23125
23126/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23127///
23128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
23129#[inline]
23130#[target_feature(enable = "avx512f")]
23131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23132#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23133#[rustc_legacy_const_generics(1)]
23134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23135pub const fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
23136    unsafe {
23137        static_assert_uimm_bits!(MASK, 8);
23138        simd_shuffle!(
23139            a,
23140            a,
23141            [
23142                MASK as u32 & 0b11,
23143                (MASK as u32 >> 2) & 0b11,
23144                ((MASK as u32 >> 4) & 0b11),
23145                ((MASK as u32 >> 6) & 0b11),
23146                (MASK as u32 & 0b11) + 4,
23147                ((MASK as u32 >> 2) & 0b11) + 4,
23148                ((MASK as u32 >> 4) & 0b11) + 4,
23149                ((MASK as u32 >> 6) & 0b11) + 4,
23150            ],
23151        )
23152    }
23153}
23154
23155/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23156///
23157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
23158#[inline]
23159#[target_feature(enable = "avx512f")]
23160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23161#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23162#[rustc_legacy_const_generics(3)]
23163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23164pub const fn _mm512_mask_permutex_epi64<const MASK: i32>(
23165    src: __m512i,
23166    k: __mmask8,
23167    a: __m512i,
23168) -> __m512i {
23169    unsafe {
23170        static_assert_uimm_bits!(MASK, 8);
23171        let r = _mm512_permutex_epi64::<MASK>(a);
23172        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
23173    }
23174}
23175
23176/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23177///
23178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
23179#[inline]
23180#[target_feature(enable = "avx512f")]
23181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23182#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23183#[rustc_legacy_const_generics(2)]
23184#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23185pub const fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
23186    unsafe {
23187        static_assert_uimm_bits!(MASK, 8);
23188        let r = _mm512_permutex_epi64::<MASK>(a);
23189        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
23190    }
23191}
23192
23193/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
23194///
23195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
23196#[inline]
23197#[target_feature(enable = "avx512f,avx512vl")]
23198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23199#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23200#[rustc_legacy_const_generics(1)]
23201#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23202pub const fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
23203    unsafe {
23204        static_assert_uimm_bits!(MASK, 8);
23205        simd_shuffle!(
23206            a,
23207            a,
23208            [
23209                MASK as u32 & 0b11,
23210                (MASK as u32 >> 2) & 0b11,
23211                ((MASK as u32 >> 4) & 0b11),
23212                ((MASK as u32 >> 6) & 0b11),
23213            ],
23214        )
23215    }
23216}
23217
23218/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23219///
23220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
23221#[inline]
23222#[target_feature(enable = "avx512f,avx512vl")]
23223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23224#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23225#[rustc_legacy_const_generics(3)]
23226#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23227pub const fn _mm256_mask_permutex_epi64<const MASK: i32>(
23228    src: __m256i,
23229    k: __mmask8,
23230    a: __m256i,
23231) -> __m256i {
23232    unsafe {
23233        static_assert_uimm_bits!(MASK, 8);
23234        let r = _mm256_permutex_epi64::<MASK>(a);
23235        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
23236    }
23237}
23238
23239/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23240///
23241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
23242#[inline]
23243#[target_feature(enable = "avx512f,avx512vl")]
23244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23245#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
23246#[rustc_legacy_const_generics(2)]
23247#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23248pub const fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
23249    unsafe {
23250        static_assert_uimm_bits!(MASK, 8);
23251        let r = _mm256_permutex_epi64::<MASK>(a);
23252        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
23253    }
23254}
23255
23256/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23257///
23258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
23259#[inline]
23260#[target_feature(enable = "avx512f")]
23261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23262#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23263#[rustc_legacy_const_generics(1)]
23264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23265pub const fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
23266    unsafe {
23267        static_assert_uimm_bits!(MASK, 8);
23268        simd_shuffle!(
23269            a,
23270            a,
23271            [
23272                MASK as u32 & 0b11,
23273                (MASK as u32 >> 2) & 0b11,
23274                ((MASK as u32 >> 4) & 0b11),
23275                ((MASK as u32 >> 6) & 0b11),
23276                (MASK as u32 & 0b11) + 4,
23277                ((MASK as u32 >> 2) & 0b11) + 4,
23278                ((MASK as u32 >> 4) & 0b11) + 4,
23279                ((MASK as u32 >> 6) & 0b11) + 4,
23280            ],
23281        )
23282    }
23283}
23284
23285/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23286///
23287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
23288#[inline]
23289#[target_feature(enable = "avx512f")]
23290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23291#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23292#[rustc_legacy_const_generics(3)]
23293#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23294pub const fn _mm512_mask_permutex_pd<const MASK: i32>(
23295    src: __m512d,
23296    k: __mmask8,
23297    a: __m512d,
23298) -> __m512d {
23299    unsafe {
23300        let r = _mm512_permutex_pd::<MASK>(a);
23301        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
23302    }
23303}
23304
23305/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23306///
23307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
23308#[inline]
23309#[target_feature(enable = "avx512f")]
23310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23311#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23312#[rustc_legacy_const_generics(2)]
23313#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23314pub const fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
23315    unsafe {
23316        let r = _mm512_permutex_pd::<MASK>(a);
23317        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
23318    }
23319}
23320
23321/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
23322///
23323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
23324#[inline]
23325#[target_feature(enable = "avx512f,avx512vl")]
23326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23327#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23328#[rustc_legacy_const_generics(1)]
23329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23330pub const fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
23331    unsafe {
23332        static_assert_uimm_bits!(MASK, 8);
23333        simd_shuffle!(
23334            a,
23335            a,
23336            [
23337                MASK as u32 & 0b11,
23338                (MASK as u32 >> 2) & 0b11,
23339                ((MASK as u32 >> 4) & 0b11),
23340                ((MASK as u32 >> 6) & 0b11),
23341            ],
23342        )
23343    }
23344}
23345
23346/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23347///
23348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
23349#[inline]
23350#[target_feature(enable = "avx512f,avx512vl")]
23351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23352#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23353#[rustc_legacy_const_generics(3)]
23354#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23355pub const fn _mm256_mask_permutex_pd<const MASK: i32>(
23356    src: __m256d,
23357    k: __mmask8,
23358    a: __m256d,
23359) -> __m256d {
23360    unsafe {
23361        static_assert_uimm_bits!(MASK, 8);
23362        let r = _mm256_permutex_pd::<MASK>(a);
23363        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
23364    }
23365}
23366
23367/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23368///
23369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
23370#[inline]
23371#[target_feature(enable = "avx512f,avx512vl")]
23372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23373#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
23374#[rustc_legacy_const_generics(2)]
23375#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
23376pub const fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
23377    unsafe {
23378        static_assert_uimm_bits!(MASK, 8);
23379        let r = _mm256_permutex_pd::<MASK>(a);
23380        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
23381    }
23382}
23383
23384/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23385///
23386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
23387#[inline]
23388#[target_feature(enable = "avx512f")]
23389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23390#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23391pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23392    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
23393}
23394
23395/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
23396///
23397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
23398#[inline]
23399#[target_feature(enable = "avx512f")]
23400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23401#[cfg_attr(test, assert_instr(vpermd))]
23402pub fn _mm512_mask_permutevar_epi32(
23403    src: __m512i,
23404    k: __mmask16,
23405    idx: __m512i,
23406    a: __m512i,
23407) -> __m512i {
23408    unsafe {
23409        let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
23410        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
23411    }
23412}
23413
23414/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23415///
23416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
23417#[inline]
23418#[target_feature(enable = "avx512f")]
23419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23420#[cfg_attr(test, assert_instr(vpermilps))]
23421pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
23422    unsafe { transmute(vpermilps(a.as_f32x16(), b.as_i32x16())) }
23423}
23424
23425/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23426///
23427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
23428#[inline]
23429#[target_feature(enable = "avx512f")]
23430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23431#[cfg_attr(test, assert_instr(vpermilps))]
23432pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23433    unsafe {
23434        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
23435        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
23436    }
23437}
23438
23439/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23440///
23441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
23442#[inline]
23443#[target_feature(enable = "avx512f")]
23444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23445#[cfg_attr(test, assert_instr(vpermilps))]
23446pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
23447    unsafe {
23448        let permute = _mm512_permutevar_ps(a, b).as_f32x16();
23449        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23450    }
23451}
23452
23453/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23454///
23455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
23456#[inline]
23457#[target_feature(enable = "avx512f,avx512vl")]
23458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23459#[cfg_attr(test, assert_instr(vpermilps))]
23460pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23461    unsafe {
23462        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
23463        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23464    }
23465}
23466
23467/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23468///
23469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
23470#[inline]
23471#[target_feature(enable = "avx512f,avx512vl")]
23472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23473#[cfg_attr(test, assert_instr(vpermilps))]
23474pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
23475    unsafe {
23476        let permute = _mm256_permutevar_ps(a, b).as_f32x8();
23477        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23478    }
23479}
23480
23481/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23482///
23483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
23484#[inline]
23485#[target_feature(enable = "avx512f,avx512vl")]
23486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23487#[cfg_attr(test, assert_instr(vpermilps))]
23488pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23489    unsafe {
23490        let permute = _mm_permutevar_ps(a, b).as_f32x4();
23491        transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
23492    }
23493}
23494
23495/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23496///
23497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
23498#[inline]
23499#[target_feature(enable = "avx512f,avx512vl")]
23500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23501#[cfg_attr(test, assert_instr(vpermilps))]
23502pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
23503    unsafe {
23504        let permute = _mm_permutevar_ps(a, b).as_f32x4();
23505        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
23506    }
23507}
23508
23509/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
23510///
23511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
23512#[inline]
23513#[target_feature(enable = "avx512f")]
23514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23515#[cfg_attr(test, assert_instr(vpermilpd))]
23516pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
23517    unsafe { transmute(vpermilpd(a.as_f64x8(), b.as_i64x8())) }
23518}
23519
23520/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23521///
23522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
23523#[inline]
23524#[target_feature(enable = "avx512f")]
23525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23526#[cfg_attr(test, assert_instr(vpermilpd))]
23527pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23528    unsafe {
23529        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
23530        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23531    }
23532}
23533
23534/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23535///
23536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
23537#[inline]
23538#[target_feature(enable = "avx512f")]
23539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23540#[cfg_attr(test, assert_instr(vpermilpd))]
23541pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
23542    unsafe {
23543        let permute = _mm512_permutevar_pd(a, b).as_f64x8();
23544        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23545    }
23546}
23547
23548/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23549///
23550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
23551#[inline]
23552#[target_feature(enable = "avx512f,avx512vl")]
23553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23554#[cfg_attr(test, assert_instr(vpermilpd))]
23555pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23556    unsafe {
23557        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
23558        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23559    }
23560}
23561
23562/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23563///
23564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
23565#[inline]
23566#[target_feature(enable = "avx512f,avx512vl")]
23567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23568#[cfg_attr(test, assert_instr(vpermilpd))]
23569pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
23570    unsafe {
23571        let permute = _mm256_permutevar_pd(a, b).as_f64x4();
23572        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23573    }
23574}
23575
23576/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23577///
23578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
23579#[inline]
23580#[target_feature(enable = "avx512f,avx512vl")]
23581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23582#[cfg_attr(test, assert_instr(vpermilpd))]
23583pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23584    unsafe {
23585        let permute = _mm_permutevar_pd(a, b).as_f64x2();
23586        transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
23587    }
23588}
23589
23590/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23591///
23592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
23593#[inline]
23594#[target_feature(enable = "avx512f,avx512vl")]
23595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23596#[cfg_attr(test, assert_instr(vpermilpd))]
23597pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
23598    unsafe {
23599        let permute = _mm_permutevar_pd(a, b).as_f64x2();
23600        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
23601    }
23602}
23603
23604/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23605///
23606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
23607#[inline]
23608#[target_feature(enable = "avx512f")]
23609#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23610#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23611pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
23612    unsafe { transmute(vpermd(a.as_i32x16(), idx.as_i32x16())) }
23613}
23614
23615/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23616///
23617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
23618#[inline]
23619#[target_feature(enable = "avx512f")]
23620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23621#[cfg_attr(test, assert_instr(vpermd))]
23622pub fn _mm512_mask_permutexvar_epi32(
23623    src: __m512i,
23624    k: __mmask16,
23625    idx: __m512i,
23626    a: __m512i,
23627) -> __m512i {
23628    unsafe {
23629        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23630        transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
23631    }
23632}
23633
23634/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23635///
23636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
23637#[inline]
23638#[target_feature(enable = "avx512f")]
23639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23640#[cfg_attr(test, assert_instr(vpermd))]
23641pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
23642    unsafe {
23643        let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
23644        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23645    }
23646}
23647
23648/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23649///
23650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
23651#[inline]
23652#[target_feature(enable = "avx512f,avx512vl")]
23653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23654#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
23655pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
23656    _mm256_permutevar8x32_epi32(a, idx) // llvm use llvm.x86.avx2.permd
23657}
23658
23659/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23660///
23661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
23662#[inline]
23663#[target_feature(enable = "avx512f,avx512vl")]
23664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23665#[cfg_attr(test, assert_instr(vpermd))]
23666pub fn _mm256_mask_permutexvar_epi32(
23667    src: __m256i,
23668    k: __mmask8,
23669    idx: __m256i,
23670    a: __m256i,
23671) -> __m256i {
23672    unsafe {
23673        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23674        transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
23675    }
23676}
23677
23678/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23679///
23680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
23681#[inline]
23682#[target_feature(enable = "avx512f,avx512vl")]
23683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23684#[cfg_attr(test, assert_instr(vpermd))]
23685pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23686    unsafe {
23687        let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
23688        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
23689    }
23690}
23691
23692/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23693///
23694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
23695#[inline]
23696#[target_feature(enable = "avx512f")]
23697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23698#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23699pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
23700    unsafe { transmute(vpermq(a.as_i64x8(), idx.as_i64x8())) }
23701}
23702
23703/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23704///
23705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
23706#[inline]
23707#[target_feature(enable = "avx512f")]
23708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23709#[cfg_attr(test, assert_instr(vpermq))]
23710pub fn _mm512_mask_permutexvar_epi64(
23711    src: __m512i,
23712    k: __mmask8,
23713    idx: __m512i,
23714    a: __m512i,
23715) -> __m512i {
23716    unsafe {
23717        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23718        transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
23719    }
23720}
23721
23722/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23723///
23724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
23725#[inline]
23726#[target_feature(enable = "avx512f")]
23727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23728#[cfg_attr(test, assert_instr(vpermq))]
23729pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
23730    unsafe {
23731        let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
23732        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
23733    }
23734}
23735
23736/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
23737///
23738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
23739#[inline]
23740#[target_feature(enable = "avx512f,avx512vl")]
23741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23742#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
23743pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
23744    unsafe { transmute(vpermq256(a.as_i64x4(), idx.as_i64x4())) }
23745}
23746
23747/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23748///
23749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
23750#[inline]
23751#[target_feature(enable = "avx512f,avx512vl")]
23752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23753#[cfg_attr(test, assert_instr(vpermq))]
23754pub fn _mm256_mask_permutexvar_epi64(
23755    src: __m256i,
23756    k: __mmask8,
23757    idx: __m256i,
23758    a: __m256i,
23759) -> __m256i {
23760    unsafe {
23761        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23762        transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
23763    }
23764}
23765
23766/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23767///
23768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
23769#[inline]
23770#[target_feature(enable = "avx512f,avx512vl")]
23771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23772#[cfg_attr(test, assert_instr(vpermq))]
23773pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23774    unsafe {
23775        let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23776        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
23777    }
23778}
23779
23780/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23781///
23782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
23783#[inline]
23784#[target_feature(enable = "avx512f")]
23785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23786#[cfg_attr(test, assert_instr(vpermps))]
23787pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
23788    unsafe { transmute(vpermps(a.as_f32x16(), idx.as_i32x16())) }
23789}
23790
23791/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23792///
23793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
23794#[inline]
23795#[target_feature(enable = "avx512f")]
23796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23797#[cfg_attr(test, assert_instr(vpermps))]
23798pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23799    unsafe {
23800        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23801        transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
23802    }
23803}
23804
23805/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23806///
23807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
23808#[inline]
23809#[target_feature(enable = "avx512f")]
23810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23811#[cfg_attr(test, assert_instr(vpermps))]
23812pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23813    unsafe {
23814        let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
23815        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
23816    }
23817}
23818
23819/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23820///
23821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23822#[inline]
23823#[target_feature(enable = "avx512f,avx512vl")]
23824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23825#[cfg_attr(test, assert_instr(vpermps))]
23826pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23827    _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23828}
23829
23830/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23831///
23832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23833#[inline]
23834#[target_feature(enable = "avx512f,avx512vl")]
23835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23836#[cfg_attr(test, assert_instr(vpermps))]
23837pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23838    unsafe {
23839        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23840        transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
23841    }
23842}
23843
23844/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23845///
23846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23847#[inline]
23848#[target_feature(enable = "avx512f,avx512vl")]
23849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23850#[cfg_attr(test, assert_instr(vpermps))]
23851pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23852    unsafe {
23853        let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
23854        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
23855    }
23856}
23857
23858/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23859///
23860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23861#[inline]
23862#[target_feature(enable = "avx512f")]
23863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23864#[cfg_attr(test, assert_instr(vpermpd))]
23865pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23866    unsafe { transmute(vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23867}
23868
23869/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23870///
23871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23872#[inline]
23873#[target_feature(enable = "avx512f")]
23874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23875#[cfg_attr(test, assert_instr(vpermpd))]
23876pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23877    unsafe {
23878        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23879        transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
23880    }
23881}
23882
23883/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23884///
23885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23886#[inline]
23887#[target_feature(enable = "avx512f")]
23888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23889#[cfg_attr(test, assert_instr(vpermpd))]
23890pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23891    unsafe {
23892        let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
23893        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
23894    }
23895}
23896
23897/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23898///
23899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23900#[inline]
23901#[target_feature(enable = "avx512f,avx512vl")]
23902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23903#[cfg_attr(test, assert_instr(vpermpd))]
23904pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23905    unsafe { transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23906}
23907
23908/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23909///
23910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23911#[inline]
23912#[target_feature(enable = "avx512f,avx512vl")]
23913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23914#[cfg_attr(test, assert_instr(vpermpd))]
23915pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23916    unsafe {
23917        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23918        transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
23919    }
23920}
23921
23922/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23923///
23924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23925#[inline]
23926#[target_feature(enable = "avx512f,avx512vl")]
23927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23928#[cfg_attr(test, assert_instr(vpermpd))]
23929pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23930    unsafe {
23931        let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
23932        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
23933    }
23934}
23935
23936/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23937///
23938/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23939#[inline]
23940#[target_feature(enable = "avx512f")]
23941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23942#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23943pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23944    unsafe { transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23945}
23946
23947/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23948///
23949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23950#[inline]
23951#[target_feature(enable = "avx512f")]
23952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23953#[cfg_attr(test, assert_instr(vpermt2d))]
23954pub fn _mm512_mask_permutex2var_epi32(
23955    a: __m512i,
23956    k: __mmask16,
23957    idx: __m512i,
23958    b: __m512i,
23959) -> __m512i {
23960    unsafe {
23961        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23962        transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
23963    }
23964}
23965
23966/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23967///
23968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23969#[inline]
23970#[target_feature(enable = "avx512f")]
23971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23972#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23973pub fn _mm512_maskz_permutex2var_epi32(
23974    k: __mmask16,
23975    a: __m512i,
23976    idx: __m512i,
23977    b: __m512i,
23978) -> __m512i {
23979    unsafe {
23980        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23981        transmute(simd_select_bitmask(k, permute, i32x16::ZERO))
23982    }
23983}
23984
23985/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23986///
23987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23988#[inline]
23989#[target_feature(enable = "avx512f")]
23990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23991#[cfg_attr(test, assert_instr(vpermi2d))]
23992pub fn _mm512_mask2_permutex2var_epi32(
23993    a: __m512i,
23994    idx: __m512i,
23995    k: __mmask16,
23996    b: __m512i,
23997) -> __m512i {
23998    unsafe {
23999        let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
24000        transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
24001    }
24002}
24003
24004/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24005///
24006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
24007#[inline]
24008#[target_feature(enable = "avx512f,avx512vl")]
24009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24010#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24011pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24012    unsafe { transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
24013}
24014
24015/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24016///
24017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
24018#[inline]
24019#[target_feature(enable = "avx512f,avx512vl")]
24020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24021#[cfg_attr(test, assert_instr(vpermt2d))]
24022pub fn _mm256_mask_permutex2var_epi32(
24023    a: __m256i,
24024    k: __mmask8,
24025    idx: __m256i,
24026    b: __m256i,
24027) -> __m256i {
24028    unsafe {
24029        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24030        transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
24031    }
24032}
24033
24034/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24035///
24036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
24037#[inline]
24038#[target_feature(enable = "avx512f,avx512vl")]
24039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24040#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24041pub fn _mm256_maskz_permutex2var_epi32(
24042    k: __mmask8,
24043    a: __m256i,
24044    idx: __m256i,
24045    b: __m256i,
24046) -> __m256i {
24047    unsafe {
24048        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24049        transmute(simd_select_bitmask(k, permute, i32x8::ZERO))
24050    }
24051}
24052
24053/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24054///
24055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
24056#[inline]
24057#[target_feature(enable = "avx512f,avx512vl")]
24058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24059#[cfg_attr(test, assert_instr(vpermi2d))]
24060pub fn _mm256_mask2_permutex2var_epi32(
24061    a: __m256i,
24062    idx: __m256i,
24063    k: __mmask8,
24064    b: __m256i,
24065) -> __m256i {
24066    unsafe {
24067        let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
24068        transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
24069    }
24070}
24071
24072/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24073///
24074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
24075#[inline]
24076#[target_feature(enable = "avx512f,avx512vl")]
24077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24078#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24079pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24080    unsafe { transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
24081}
24082
24083/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24084///
24085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
24086#[inline]
24087#[target_feature(enable = "avx512f,avx512vl")]
24088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24089#[cfg_attr(test, assert_instr(vpermt2d))]
24090pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24091    unsafe {
24092        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24093        transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
24094    }
24095}
24096
24097/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24098///
24099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
24100#[inline]
24101#[target_feature(enable = "avx512f,avx512vl")]
24102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24103#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
24104pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24105    unsafe {
24106        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24107        transmute(simd_select_bitmask(k, permute, i32x4::ZERO))
24108    }
24109}
24110
24111/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24112///
24113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
24114#[inline]
24115#[target_feature(enable = "avx512f,avx512vl")]
24116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24117#[cfg_attr(test, assert_instr(vpermi2d))]
24118pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24119    unsafe {
24120        let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
24121        transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
24122    }
24123}
24124
24125/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24126///
24127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
24128#[inline]
24129#[target_feature(enable = "avx512f")]
24130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24131#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24132pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
24133    unsafe { transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
24134}
24135
24136/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24137///
24138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
24139#[inline]
24140#[target_feature(enable = "avx512f")]
24141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24142#[cfg_attr(test, assert_instr(vpermt2q))]
24143pub fn _mm512_mask_permutex2var_epi64(
24144    a: __m512i,
24145    k: __mmask8,
24146    idx: __m512i,
24147    b: __m512i,
24148) -> __m512i {
24149    unsafe {
24150        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24151        transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
24152    }
24153}
24154
24155/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24156///
24157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
24158#[inline]
24159#[target_feature(enable = "avx512f")]
24160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24161#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24162pub fn _mm512_maskz_permutex2var_epi64(
24163    k: __mmask8,
24164    a: __m512i,
24165    idx: __m512i,
24166    b: __m512i,
24167) -> __m512i {
24168    unsafe {
24169        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24170        transmute(simd_select_bitmask(k, permute, i64x8::ZERO))
24171    }
24172}
24173
24174/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24175///
24176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
24177#[inline]
24178#[target_feature(enable = "avx512f")]
24179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24180#[cfg_attr(test, assert_instr(vpermi2q))]
24181pub fn _mm512_mask2_permutex2var_epi64(
24182    a: __m512i,
24183    idx: __m512i,
24184    k: __mmask8,
24185    b: __m512i,
24186) -> __m512i {
24187    unsafe {
24188        let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
24189        transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
24190    }
24191}
24192
24193/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24194///
24195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
24196#[inline]
24197#[target_feature(enable = "avx512f,avx512vl")]
24198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24199#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24200pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
24201    unsafe { transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
24202}
24203
24204/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24205///
24206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
24207#[inline]
24208#[target_feature(enable = "avx512f,avx512vl")]
24209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24210#[cfg_attr(test, assert_instr(vpermt2q))]
24211pub fn _mm256_mask_permutex2var_epi64(
24212    a: __m256i,
24213    k: __mmask8,
24214    idx: __m256i,
24215    b: __m256i,
24216) -> __m256i {
24217    unsafe {
24218        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24219        transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
24220    }
24221}
24222
24223/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24224///
24225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
24226#[inline]
24227#[target_feature(enable = "avx512f,avx512vl")]
24228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24229#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24230pub fn _mm256_maskz_permutex2var_epi64(
24231    k: __mmask8,
24232    a: __m256i,
24233    idx: __m256i,
24234    b: __m256i,
24235) -> __m256i {
24236    unsafe {
24237        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24238        transmute(simd_select_bitmask(k, permute, i64x4::ZERO))
24239    }
24240}
24241
24242/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24243///
24244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
24245#[inline]
24246#[target_feature(enable = "avx512f,avx512vl")]
24247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24248#[cfg_attr(test, assert_instr(vpermi2q))]
24249pub fn _mm256_mask2_permutex2var_epi64(
24250    a: __m256i,
24251    idx: __m256i,
24252    k: __mmask8,
24253    b: __m256i,
24254) -> __m256i {
24255    unsafe {
24256        let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
24257        transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
24258    }
24259}
24260
24261/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24262///
24263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
24264#[inline]
24265#[target_feature(enable = "avx512f,avx512vl")]
24266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24267#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24268pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24269    unsafe { transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
24270}
24271
24272/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24273///
24274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
24275#[inline]
24276#[target_feature(enable = "avx512f,avx512vl")]
24277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24278#[cfg_attr(test, assert_instr(vpermt2q))]
24279pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
24280    unsafe {
24281        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24282        transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
24283    }
24284}
24285
24286/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24287///
24288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
24289#[inline]
24290#[target_feature(enable = "avx512f,avx512vl")]
24291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24292#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
24293pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
24294    unsafe {
24295        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24296        transmute(simd_select_bitmask(k, permute, i64x2::ZERO))
24297    }
24298}
24299
24300/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24301///
24302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
24303#[inline]
24304#[target_feature(enable = "avx512f,avx512vl")]
24305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24306#[cfg_attr(test, assert_instr(vpermi2q))]
24307pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
24308    unsafe {
24309        let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
24310        transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
24311    }
24312}
24313
24314/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24315///
24316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
24317#[inline]
24318#[target_feature(enable = "avx512f")]
24319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24320#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24321pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
24322    unsafe { transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
24323}
24324
24325/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24326///
24327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
24328#[inline]
24329#[target_feature(enable = "avx512f")]
24330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24331#[cfg_attr(test, assert_instr(vpermt2ps))]
24332pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
24333    unsafe {
24334        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24335        transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
24336    }
24337}
24338
24339/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24340///
24341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
24342#[inline]
24343#[target_feature(enable = "avx512f")]
24344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24345#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24346pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
24347    unsafe {
24348        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24349        transmute(simd_select_bitmask(k, permute, f32x16::ZERO))
24350    }
24351}
24352
24353/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24354///
24355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
24356#[inline]
24357#[target_feature(enable = "avx512f")]
24358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24359#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24360pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
24361    unsafe {
24362        let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
24363        let idx = _mm512_castsi512_ps(idx).as_f32x16();
24364        transmute(simd_select_bitmask(k, permute, idx))
24365    }
24366}
24367
24368/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24369///
24370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
24371#[inline]
24372#[target_feature(enable = "avx512f,avx512vl")]
24373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24374#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24375pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
24376    unsafe { transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
24377}
24378
24379/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24380///
24381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
24382#[inline]
24383#[target_feature(enable = "avx512f,avx512vl")]
24384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24385#[cfg_attr(test, assert_instr(vpermt2ps))]
24386pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
24387    unsafe {
24388        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24389        transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
24390    }
24391}
24392
24393/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24394///
24395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
24396#[inline]
24397#[target_feature(enable = "avx512f,avx512vl")]
24398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24399#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24400pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
24401    unsafe {
24402        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24403        transmute(simd_select_bitmask(k, permute, f32x8::ZERO))
24404    }
24405}
24406
24407/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24408///
24409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
24410#[inline]
24411#[target_feature(enable = "avx512f,avx512vl")]
24412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24413#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24414pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
24415    unsafe {
24416        let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
24417        let idx = _mm256_castsi256_ps(idx).as_f32x8();
24418        transmute(simd_select_bitmask(k, permute, idx))
24419    }
24420}
24421
24422/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24423///
24424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
24425#[inline]
24426#[target_feature(enable = "avx512f,avx512vl")]
24427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24428#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24429pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
24430    unsafe { transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
24431}
24432
24433/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24434///
24435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
24436#[inline]
24437#[target_feature(enable = "avx512f,avx512vl")]
24438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24439#[cfg_attr(test, assert_instr(vpermt2ps))]
24440pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
24441    unsafe {
24442        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24443        transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
24444    }
24445}
24446
24447/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24448///
24449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
24450#[inline]
24451#[target_feature(enable = "avx512f,avx512vl")]
24452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24453#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
24454pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
24455    unsafe {
24456        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24457        transmute(simd_select_bitmask(k, permute, f32x4::ZERO))
24458    }
24459}
24460
24461/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
24462///
24463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
24464#[inline]
24465#[target_feature(enable = "avx512f,avx512vl")]
24466#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24467#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
24468pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
24469    unsafe {
24470        let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
24471        let idx = _mm_castsi128_ps(idx).as_f32x4();
24472        transmute(simd_select_bitmask(k, permute, idx))
24473    }
24474}
24475
24476/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24477///
24478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
24479#[inline]
24480#[target_feature(enable = "avx512f")]
24481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24482#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24483pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24484    unsafe { transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
24485}
24486
24487/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24488///
24489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
24490#[inline]
24491#[target_feature(enable = "avx512f")]
24492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24493#[cfg_attr(test, assert_instr(vpermt2pd))]
24494pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
24495    unsafe {
24496        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24497        transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
24498    }
24499}
24500
24501/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24502///
24503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
24504#[inline]
24505#[target_feature(enable = "avx512f")]
24506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24507#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24508pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
24509    unsafe {
24510        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24511        transmute(simd_select_bitmask(k, permute, f64x8::ZERO))
24512    }
24513}
24514
24515/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24516///
24517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
24518#[inline]
24519#[target_feature(enable = "avx512f")]
24520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24521#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24522pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
24523    unsafe {
24524        let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
24525        let idx = _mm512_castsi512_pd(idx).as_f64x8();
24526        transmute(simd_select_bitmask(k, permute, idx))
24527    }
24528}
24529
24530/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24531///
24532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
24533#[inline]
24534#[target_feature(enable = "avx512f,avx512vl")]
24535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24536#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24537pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24538    unsafe { transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
24539}
24540
24541/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24542///
24543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
24544#[inline]
24545#[target_feature(enable = "avx512f,avx512vl")]
24546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24547#[cfg_attr(test, assert_instr(vpermt2pd))]
24548pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
24549    unsafe {
24550        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24551        transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
24552    }
24553}
24554
24555/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24556///
24557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
24558#[inline]
24559#[target_feature(enable = "avx512f,avx512vl")]
24560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24561#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24562pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
24563    unsafe {
24564        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24565        transmute(simd_select_bitmask(k, permute, f64x4::ZERO))
24566    }
24567}
24568
24569/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24570///
24571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
24572#[inline]
24573#[target_feature(enable = "avx512f,avx512vl")]
24574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24575#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24576pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
24577    unsafe {
24578        let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
24579        let idx = _mm256_castsi256_pd(idx).as_f64x4();
24580        transmute(simd_select_bitmask(k, permute, idx))
24581    }
24582}
24583
24584/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
24585///
24586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
24587#[inline]
24588#[target_feature(enable = "avx512f,avx512vl")]
24589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24590#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24591pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24592    unsafe { transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
24593}
24594
24595/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
24596///
24597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
24598#[inline]
24599#[target_feature(enable = "avx512f,avx512vl")]
24600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24601#[cfg_attr(test, assert_instr(vpermt2pd))]
24602pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
24603    unsafe {
24604        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24605        transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
24606    }
24607}
24608
24609/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24610///
24611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
24612#[inline]
24613#[target_feature(enable = "avx512f,avx512vl")]
24614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24615#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
24616pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
24617    unsafe {
24618        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24619        transmute(simd_select_bitmask(k, permute, f64x2::ZERO))
24620    }
24621}
24622
24623/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
24624///
24625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
24626#[inline]
24627#[target_feature(enable = "avx512f,avx512vl")]
24628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24629#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
24630pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
24631    unsafe {
24632        let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
24633        let idx = _mm_castsi128_pd(idx).as_f64x2();
24634        transmute(simd_select_bitmask(k, permute, idx))
24635    }
24636}
24637
24638/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24639///
24640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
24641#[inline]
24642#[target_feature(enable = "avx512f")]
24643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24644#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
24645#[rustc_legacy_const_generics(1)]
24646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24647pub const fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
24648    unsafe {
24649        static_assert_uimm_bits!(MASK, 8);
24650        let r: i32x16 = simd_shuffle!(
24651            a.as_i32x16(),
24652            a.as_i32x16(),
24653            [
24654                MASK as u32 & 0b11,
24655                (MASK as u32 >> 2) & 0b11,
24656                (MASK as u32 >> 4) & 0b11,
24657                (MASK as u32 >> 6) & 0b11,
24658                (MASK as u32 & 0b11) + 4,
24659                ((MASK as u32 >> 2) & 0b11) + 4,
24660                ((MASK as u32 >> 4) & 0b11) + 4,
24661                ((MASK as u32 >> 6) & 0b11) + 4,
24662                (MASK as u32 & 0b11) + 8,
24663                ((MASK as u32 >> 2) & 0b11) + 8,
24664                ((MASK as u32 >> 4) & 0b11) + 8,
24665                ((MASK as u32 >> 6) & 0b11) + 8,
24666                (MASK as u32 & 0b11) + 12,
24667                ((MASK as u32 >> 2) & 0b11) + 12,
24668                ((MASK as u32 >> 4) & 0b11) + 12,
24669                ((MASK as u32 >> 6) & 0b11) + 12,
24670            ],
24671        );
24672        transmute(r)
24673    }
24674}
24675
24676/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24677///
24678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
24679#[inline]
24680#[target_feature(enable = "avx512f")]
24681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24682#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24683#[rustc_legacy_const_generics(3)]
24684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24685pub const fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24686    src: __m512i,
24687    k: __mmask16,
24688    a: __m512i,
24689) -> __m512i {
24690    unsafe {
24691        static_assert_uimm_bits!(MASK, 8);
24692        let r = _mm512_shuffle_epi32::<MASK>(a);
24693        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
24694    }
24695}
24696
24697/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24698///
24699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
24700#[inline]
24701#[target_feature(enable = "avx512f")]
24702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24703#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24704#[rustc_legacy_const_generics(2)]
24705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24706pub const fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24707    k: __mmask16,
24708    a: __m512i,
24709) -> __m512i {
24710    unsafe {
24711        static_assert_uimm_bits!(MASK, 8);
24712        let r = _mm512_shuffle_epi32::<MASK>(a);
24713        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
24714    }
24715}
24716
24717/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24718///
24719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
24720#[inline]
24721#[target_feature(enable = "avx512f,avx512vl")]
24722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24723#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24724#[rustc_legacy_const_generics(3)]
24725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24726pub const fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24727    src: __m256i,
24728    k: __mmask8,
24729    a: __m256i,
24730) -> __m256i {
24731    unsafe {
24732        static_assert_uimm_bits!(MASK, 8);
24733        let r = _mm256_shuffle_epi32::<MASK>(a);
24734        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
24735    }
24736}
24737
24738/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24739///
24740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
24741#[inline]
24742#[target_feature(enable = "avx512f,avx512vl")]
24743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24744#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24745#[rustc_legacy_const_generics(2)]
24746#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24747pub const fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24748    k: __mmask8,
24749    a: __m256i,
24750) -> __m256i {
24751    unsafe {
24752        static_assert_uimm_bits!(MASK, 8);
24753        let r = _mm256_shuffle_epi32::<MASK>(a);
24754        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
24755    }
24756}
24757
24758/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24759///
24760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
24761#[inline]
24762#[target_feature(enable = "avx512f,avx512vl")]
24763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24764#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24765#[rustc_legacy_const_generics(3)]
24766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24767pub const fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24768    src: __m128i,
24769    k: __mmask8,
24770    a: __m128i,
24771) -> __m128i {
24772    unsafe {
24773        static_assert_uimm_bits!(MASK, 8);
24774        let r = _mm_shuffle_epi32::<MASK>(a);
24775        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
24776    }
24777}
24778
24779/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24780///
24781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
24782#[inline]
24783#[target_feature(enable = "avx512f,avx512vl")]
24784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24785#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24786#[rustc_legacy_const_generics(2)]
24787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24788pub const fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24789    k: __mmask8,
24790    a: __m128i,
24791) -> __m128i {
24792    unsafe {
24793        static_assert_uimm_bits!(MASK, 8);
24794        let r = _mm_shuffle_epi32::<MASK>(a);
24795        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
24796    }
24797}
24798
24799/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24800///
24801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
24802#[inline]
24803#[target_feature(enable = "avx512f")]
24804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24805#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24806#[rustc_legacy_const_generics(2)]
24807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24808pub const fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24809    unsafe {
24810        static_assert_uimm_bits!(MASK, 8);
24811        simd_shuffle!(
24812            a,
24813            b,
24814            [
24815                MASK as u32 & 0b11,
24816                (MASK as u32 >> 2) & 0b11,
24817                ((MASK as u32 >> 4) & 0b11) + 16,
24818                ((MASK as u32 >> 6) & 0b11) + 16,
24819                (MASK as u32 & 0b11) + 4,
24820                ((MASK as u32 >> 2) & 0b11) + 4,
24821                ((MASK as u32 >> 4) & 0b11) + 20,
24822                ((MASK as u32 >> 6) & 0b11) + 20,
24823                (MASK as u32 & 0b11) + 8,
24824                ((MASK as u32 >> 2) & 0b11) + 8,
24825                ((MASK as u32 >> 4) & 0b11) + 24,
24826                ((MASK as u32 >> 6) & 0b11) + 24,
24827                (MASK as u32 & 0b11) + 12,
24828                ((MASK as u32 >> 2) & 0b11) + 12,
24829                ((MASK as u32 >> 4) & 0b11) + 28,
24830                ((MASK as u32 >> 6) & 0b11) + 28,
24831            ],
24832        )
24833    }
24834}
24835
24836/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24837///
24838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24839#[inline]
24840#[target_feature(enable = "avx512f")]
24841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24842#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24843#[rustc_legacy_const_generics(4)]
24844#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24845pub const fn _mm512_mask_shuffle_ps<const MASK: i32>(
24846    src: __m512,
24847    k: __mmask16,
24848    a: __m512,
24849    b: __m512,
24850) -> __m512 {
24851    unsafe {
24852        static_assert_uimm_bits!(MASK, 8);
24853        let r = _mm512_shuffle_ps::<MASK>(a, b);
24854        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
24855    }
24856}
24857
24858/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24859///
24860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24861#[inline]
24862#[target_feature(enable = "avx512f")]
24863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24864#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24865#[rustc_legacy_const_generics(3)]
24866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24867pub const fn _mm512_maskz_shuffle_ps<const MASK: i32>(
24868    k: __mmask16,
24869    a: __m512,
24870    b: __m512,
24871) -> __m512 {
24872    unsafe {
24873        static_assert_uimm_bits!(MASK, 8);
24874        let r = _mm512_shuffle_ps::<MASK>(a, b);
24875        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
24876    }
24877}
24878
24879/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24880///
24881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24882#[inline]
24883#[target_feature(enable = "avx512f,avx512vl")]
24884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24885#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24886#[rustc_legacy_const_generics(4)]
24887#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24888pub const fn _mm256_mask_shuffle_ps<const MASK: i32>(
24889    src: __m256,
24890    k: __mmask8,
24891    a: __m256,
24892    b: __m256,
24893) -> __m256 {
24894    unsafe {
24895        static_assert_uimm_bits!(MASK, 8);
24896        let r = _mm256_shuffle_ps::<MASK>(a, b);
24897        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
24898    }
24899}
24900
24901/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24902///
24903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24904#[inline]
24905#[target_feature(enable = "avx512f,avx512vl")]
24906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24907#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24908#[rustc_legacy_const_generics(3)]
24909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24910pub const fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24911    unsafe {
24912        static_assert_uimm_bits!(MASK, 8);
24913        let r = _mm256_shuffle_ps::<MASK>(a, b);
24914        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
24915    }
24916}
24917
24918/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24919///
24920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24921#[inline]
24922#[target_feature(enable = "avx512f,avx512vl")]
24923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24924#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24925#[rustc_legacy_const_generics(4)]
24926#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24927pub const fn _mm_mask_shuffle_ps<const MASK: i32>(
24928    src: __m128,
24929    k: __mmask8,
24930    a: __m128,
24931    b: __m128,
24932) -> __m128 {
24933    unsafe {
24934        static_assert_uimm_bits!(MASK, 8);
24935        let r = _mm_shuffle_ps::<MASK>(a, b);
24936        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
24937    }
24938}
24939
24940/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24941///
24942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24943#[inline]
24944#[target_feature(enable = "avx512f,avx512vl")]
24945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24946#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24947#[rustc_legacy_const_generics(3)]
24948#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24949pub const fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24950    unsafe {
24951        static_assert_uimm_bits!(MASK, 8);
24952        let r = _mm_shuffle_ps::<MASK>(a, b);
24953        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
24954    }
24955}
24956
24957/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24958///
24959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24960#[inline]
24961#[target_feature(enable = "avx512f")]
24962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24963#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24964#[rustc_legacy_const_generics(2)]
24965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24966pub const fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24967    unsafe {
24968        static_assert_uimm_bits!(MASK, 8);
24969        simd_shuffle!(
24970            a,
24971            b,
24972            [
24973                MASK as u32 & 0b1,
24974                ((MASK as u32 >> 1) & 0b1) + 8,
24975                ((MASK as u32 >> 2) & 0b1) + 2,
24976                ((MASK as u32 >> 3) & 0b1) + 10,
24977                ((MASK as u32 >> 4) & 0b1) + 4,
24978                ((MASK as u32 >> 5) & 0b1) + 12,
24979                ((MASK as u32 >> 6) & 0b1) + 6,
24980                ((MASK as u32 >> 7) & 0b1) + 14,
24981            ],
24982        )
24983    }
24984}
24985
24986/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24987///
24988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24989#[inline]
24990#[target_feature(enable = "avx512f")]
24991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24992#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24993#[rustc_legacy_const_generics(4)]
24994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
24995pub const fn _mm512_mask_shuffle_pd<const MASK: i32>(
24996    src: __m512d,
24997    k: __mmask8,
24998    a: __m512d,
24999    b: __m512d,
25000) -> __m512d {
25001    unsafe {
25002        static_assert_uimm_bits!(MASK, 8);
25003        let r = _mm512_shuffle_pd::<MASK>(a, b);
25004        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25005    }
25006}
25007
25008/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25009///
25010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
25011#[inline]
25012#[target_feature(enable = "avx512f")]
25013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25014#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25015#[rustc_legacy_const_generics(3)]
25016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25017pub const fn _mm512_maskz_shuffle_pd<const MASK: i32>(
25018    k: __mmask8,
25019    a: __m512d,
25020    b: __m512d,
25021) -> __m512d {
25022    unsafe {
25023        static_assert_uimm_bits!(MASK, 8);
25024        let r = _mm512_shuffle_pd::<MASK>(a, b);
25025        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25026    }
25027}
25028
25029/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25030///
25031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
25032#[inline]
25033#[target_feature(enable = "avx512f,avx512vl")]
25034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25035#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25036#[rustc_legacy_const_generics(4)]
25037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25038pub const fn _mm256_mask_shuffle_pd<const MASK: i32>(
25039    src: __m256d,
25040    k: __mmask8,
25041    a: __m256d,
25042    b: __m256d,
25043) -> __m256d {
25044    unsafe {
25045        static_assert_uimm_bits!(MASK, 8);
25046        let r = _mm256_shuffle_pd::<MASK>(a, b);
25047        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25048    }
25049}
25050
25051/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25052///
25053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
25054#[inline]
25055#[target_feature(enable = "avx512f,avx512vl")]
25056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25057#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
25058#[rustc_legacy_const_generics(3)]
25059#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25060pub const fn _mm256_maskz_shuffle_pd<const MASK: i32>(
25061    k: __mmask8,
25062    a: __m256d,
25063    b: __m256d,
25064) -> __m256d {
25065    unsafe {
25066        static_assert_uimm_bits!(MASK, 8);
25067        let r = _mm256_shuffle_pd::<MASK>(a, b);
25068        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25069    }
25070}
25071
25072/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25073///
25074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
25075#[inline]
25076#[target_feature(enable = "avx512f,avx512vl")]
25077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25078#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
25079#[rustc_legacy_const_generics(4)]
25080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25081pub const fn _mm_mask_shuffle_pd<const MASK: i32>(
25082    src: __m128d,
25083    k: __mmask8,
25084    a: __m128d,
25085    b: __m128d,
25086) -> __m128d {
25087    unsafe {
25088        static_assert_uimm_bits!(MASK, 8);
25089        let r = _mm_shuffle_pd::<MASK>(a, b);
25090        transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
25091    }
25092}
25093
25094/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25095///
25096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
25097#[inline]
25098#[target_feature(enable = "avx512f,avx512vl")]
25099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25100#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
25101#[rustc_legacy_const_generics(3)]
25102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25103pub const fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
25104    unsafe {
25105        static_assert_uimm_bits!(MASK, 8);
25106        let r = _mm_shuffle_pd::<MASK>(a, b);
25107        transmute(simd_select_bitmask(k, r.as_f64x2(), f64x2::ZERO))
25108    }
25109}
25110
25111/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25112///
25113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
25114#[inline]
25115#[target_feature(enable = "avx512f")]
25116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25117#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
25118#[rustc_legacy_const_generics(2)]
25119#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25120pub const fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25121    unsafe {
25122        static_assert_uimm_bits!(MASK, 8);
25123        let a = a.as_i32x16();
25124        let b = b.as_i32x16();
25125        let r: i32x16 = simd_shuffle!(
25126            a,
25127            b,
25128            [
25129                (MASK as u32 & 0b11) * 4 + 0,
25130                (MASK as u32 & 0b11) * 4 + 1,
25131                (MASK as u32 & 0b11) * 4 + 2,
25132                (MASK as u32 & 0b11) * 4 + 3,
25133                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
25134                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
25135                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
25136                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
25137                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
25138                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
25139                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
25140                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
25141                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
25142                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
25143                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
25144                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
25145            ],
25146        );
25147        transmute(r)
25148    }
25149}
25150
25151/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25152///
25153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
25154#[inline]
25155#[target_feature(enable = "avx512f")]
25156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25157#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
25158#[rustc_legacy_const_generics(4)]
25159#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25160pub const fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
25161    src: __m512i,
25162    k: __mmask16,
25163    a: __m512i,
25164    b: __m512i,
25165) -> __m512i {
25166    unsafe {
25167        static_assert_uimm_bits!(MASK, 8);
25168        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
25169        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
25170    }
25171}
25172
25173/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25174///
25175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
25176#[inline]
25177#[target_feature(enable = "avx512f")]
25178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25179#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
25180#[rustc_legacy_const_generics(3)]
25181#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25182pub const fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
25183    k: __mmask16,
25184    a: __m512i,
25185    b: __m512i,
25186) -> __m512i {
25187    unsafe {
25188        static_assert_uimm_bits!(MASK, 8);
25189        let r = _mm512_shuffle_i32x4::<MASK>(a, b);
25190        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
25191    }
25192}
25193
25194/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
25195///
25196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
25197#[inline]
25198#[target_feature(enable = "avx512f,avx512vl")]
25199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25200#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
25201#[rustc_legacy_const_generics(2)]
25202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25203pub const fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25204    unsafe {
25205        static_assert_uimm_bits!(MASK, 8);
25206        let a = a.as_i32x8();
25207        let b = b.as_i32x8();
25208        let r: i32x8 = simd_shuffle!(
25209            a,
25210            b,
25211            [
25212                (MASK as u32 & 0b1) * 4 + 0,
25213                (MASK as u32 & 0b1) * 4 + 1,
25214                (MASK as u32 & 0b1) * 4 + 2,
25215                (MASK as u32 & 0b1) * 4 + 3,
25216                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
25217                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
25218                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
25219                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
25220            ],
25221        );
25222        transmute(r)
25223    }
25224}
25225
25226/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25227///
25228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
25229#[inline]
25230#[target_feature(enable = "avx512f,avx512vl")]
25231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25232#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
25233#[rustc_legacy_const_generics(4)]
25234#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25235pub const fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
25236    src: __m256i,
25237    k: __mmask8,
25238    a: __m256i,
25239    b: __m256i,
25240) -> __m256i {
25241    unsafe {
25242        static_assert_uimm_bits!(MASK, 8);
25243        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
25244        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
25245    }
25246}
25247
25248/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25249///
25250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
25251#[inline]
25252#[target_feature(enable = "avx512f,avx512vl")]
25253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25254#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
25255#[rustc_legacy_const_generics(3)]
25256#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25257pub const fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(
25258    k: __mmask8,
25259    a: __m256i,
25260    b: __m256i,
25261) -> __m256i {
25262    unsafe {
25263        static_assert_uimm_bits!(MASK, 8);
25264        let r = _mm256_shuffle_i32x4::<MASK>(a, b);
25265        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
25266    }
25267}
25268
25269/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25270///
25271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
25272#[inline]
25273#[target_feature(enable = "avx512f")]
25274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25275#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25276#[rustc_legacy_const_generics(2)]
25277#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25278pub const fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
25279    unsafe {
25280        static_assert_uimm_bits!(MASK, 8);
25281        let a = a.as_i64x8();
25282        let b = b.as_i64x8();
25283        let r: i64x8 = simd_shuffle!(
25284            a,
25285            b,
25286            [
25287                (MASK as u32 & 0b11) * 2 + 0,
25288                (MASK as u32 & 0b11) * 2 + 1,
25289                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
25290                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
25291                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
25292                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
25293                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
25294                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
25295            ],
25296        );
25297        transmute(r)
25298    }
25299}
25300
25301/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25302///
25303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
25304#[inline]
25305#[target_feature(enable = "avx512f")]
25306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25307#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25308#[rustc_legacy_const_generics(4)]
25309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25310pub const fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
25311    src: __m512i,
25312    k: __mmask8,
25313    a: __m512i,
25314    b: __m512i,
25315) -> __m512i {
25316    unsafe {
25317        static_assert_uimm_bits!(MASK, 8);
25318        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
25319        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
25320    }
25321}
25322
25323/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25324///
25325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
25326#[inline]
25327#[target_feature(enable = "avx512f")]
25328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25329#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
25330#[rustc_legacy_const_generics(3)]
25331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25332pub const fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(
25333    k: __mmask8,
25334    a: __m512i,
25335    b: __m512i,
25336) -> __m512i {
25337    unsafe {
25338        static_assert_uimm_bits!(MASK, 8);
25339        let r = _mm512_shuffle_i64x2::<MASK>(a, b);
25340        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
25341    }
25342}
25343
25344/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
25345///
25346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
25347#[inline]
25348#[target_feature(enable = "avx512f,avx512vl")]
25349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25350#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
25351#[rustc_legacy_const_generics(2)]
25352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25353pub const fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
25354    unsafe {
25355        static_assert_uimm_bits!(MASK, 8);
25356        let a = a.as_i64x4();
25357        let b = b.as_i64x4();
25358        let r: i64x4 = simd_shuffle!(
25359            a,
25360            b,
25361            [
25362                (MASK as u32 & 0b1) * 2 + 0,
25363                (MASK as u32 & 0b1) * 2 + 1,
25364                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
25365                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
25366            ],
25367        );
25368        transmute(r)
25369    }
25370}
25371
25372/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25373///
25374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
25375#[inline]
25376#[target_feature(enable = "avx512f,avx512vl")]
25377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25378#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
25379#[rustc_legacy_const_generics(4)]
25380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25381pub const fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
25382    src: __m256i,
25383    k: __mmask8,
25384    a: __m256i,
25385    b: __m256i,
25386) -> __m256i {
25387    unsafe {
25388        static_assert_uimm_bits!(MASK, 8);
25389        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
25390        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
25391    }
25392}
25393
25394/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25395///
25396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
25397#[inline]
25398#[target_feature(enable = "avx512f,avx512vl")]
25399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25400#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
25401#[rustc_legacy_const_generics(3)]
25402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25403pub const fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(
25404    k: __mmask8,
25405    a: __m256i,
25406    b: __m256i,
25407) -> __m256i {
25408    unsafe {
25409        static_assert_uimm_bits!(MASK, 8);
25410        let r = _mm256_shuffle_i64x2::<MASK>(a, b);
25411        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
25412    }
25413}
25414
25415/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25416///
25417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
25418#[inline]
25419#[target_feature(enable = "avx512f")]
25420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25421#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
25422#[rustc_legacy_const_generics(2)]
25423#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25424pub const fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
25425    unsafe {
25426        static_assert_uimm_bits!(MASK, 8);
25427        let a = a.as_f32x16();
25428        let b = b.as_f32x16();
25429        let r: f32x16 = simd_shuffle!(
25430            a,
25431            b,
25432            [
25433                (MASK as u32 & 0b11) * 4 + 0,
25434                (MASK as u32 & 0b11) * 4 + 1,
25435                (MASK as u32 & 0b11) * 4 + 2,
25436                (MASK as u32 & 0b11) * 4 + 3,
25437                ((MASK as u32 >> 2) & 0b11) * 4 + 0,
25438                ((MASK as u32 >> 2) & 0b11) * 4 + 1,
25439                ((MASK as u32 >> 2) & 0b11) * 4 + 2,
25440                ((MASK as u32 >> 2) & 0b11) * 4 + 3,
25441                ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
25442                ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
25443                ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
25444                ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
25445                ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
25446                ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
25447                ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
25448                ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
25449            ],
25450        );
25451        transmute(r)
25452    }
25453}
25454
25455/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25456///
25457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
25458#[inline]
25459#[target_feature(enable = "avx512f")]
25460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25461#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
25462#[rustc_legacy_const_generics(4)]
25463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25464pub const fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
25465    src: __m512,
25466    k: __mmask16,
25467    a: __m512,
25468    b: __m512,
25469) -> __m512 {
25470    unsafe {
25471        static_assert_uimm_bits!(MASK, 8);
25472        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
25473        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
25474    }
25475}
25476
25477/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25478///
25479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
25480#[inline]
25481#[target_feature(enable = "avx512f")]
25482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25483#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
25484#[rustc_legacy_const_generics(3)]
25485#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25486pub const fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(
25487    k: __mmask16,
25488    a: __m512,
25489    b: __m512,
25490) -> __m512 {
25491    unsafe {
25492        static_assert_uimm_bits!(MASK, 8);
25493        let r = _mm512_shuffle_f32x4::<MASK>(a, b);
25494        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
25495    }
25496}
25497
25498/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25499///
25500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
25501#[inline]
25502#[target_feature(enable = "avx512f,avx512vl")]
25503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25504#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
25505#[rustc_legacy_const_generics(2)]
25506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25507pub const fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
25508    unsafe {
25509        static_assert_uimm_bits!(MASK, 8);
25510        let a = a.as_f32x8();
25511        let b = b.as_f32x8();
25512        let r: f32x8 = simd_shuffle!(
25513            a,
25514            b,
25515            [
25516                (MASK as u32 & 0b1) * 4 + 0,
25517                (MASK as u32 & 0b1) * 4 + 1,
25518                (MASK as u32 & 0b1) * 4 + 2,
25519                (MASK as u32 & 0b1) * 4 + 3,
25520                ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
25521                ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
25522                ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
25523                ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
25524            ],
25525        );
25526        transmute(r)
25527    }
25528}
25529
25530/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25531///
25532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
25533#[inline]
25534#[target_feature(enable = "avx512f,avx512vl")]
25535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25536#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
25537#[rustc_legacy_const_generics(4)]
25538#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25539pub const fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
25540    src: __m256,
25541    k: __mmask8,
25542    a: __m256,
25543    b: __m256,
25544) -> __m256 {
25545    unsafe {
25546        static_assert_uimm_bits!(MASK, 8);
25547        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
25548        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
25549    }
25550}
25551
25552/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25553///
25554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
25555#[inline]
25556#[target_feature(enable = "avx512f,avx512vl")]
25557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25558#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
25559#[rustc_legacy_const_generics(3)]
25560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25561pub const fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(
25562    k: __mmask8,
25563    a: __m256,
25564    b: __m256,
25565) -> __m256 {
25566    unsafe {
25567        static_assert_uimm_bits!(MASK, 8);
25568        let r = _mm256_shuffle_f32x4::<MASK>(a, b);
25569        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
25570    }
25571}
25572
25573/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25574///
25575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
25576#[inline]
25577#[target_feature(enable = "avx512f")]
25578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25579#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25580#[rustc_legacy_const_generics(2)]
25581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25582pub const fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
25583    unsafe {
25584        static_assert_uimm_bits!(MASK, 8);
25585        let a = a.as_f64x8();
25586        let b = b.as_f64x8();
25587        let r: f64x8 = simd_shuffle!(
25588            a,
25589            b,
25590            [
25591                (MASK as u32 & 0b11) * 2 + 0,
25592                (MASK as u32 & 0b11) * 2 + 1,
25593                ((MASK as u32 >> 2) & 0b11) * 2 + 0,
25594                ((MASK as u32 >> 2) & 0b11) * 2 + 1,
25595                ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
25596                ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
25597                ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
25598                ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
25599            ],
25600        );
25601        transmute(r)
25602    }
25603}
25604
25605/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25606///
25607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
25608#[inline]
25609#[target_feature(enable = "avx512f")]
25610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25611#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25612#[rustc_legacy_const_generics(4)]
25613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25614pub const fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
25615    src: __m512d,
25616    k: __mmask8,
25617    a: __m512d,
25618    b: __m512d,
25619) -> __m512d {
25620    unsafe {
25621        static_assert_uimm_bits!(MASK, 8);
25622        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
25623        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
25624    }
25625}
25626
25627/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25628///
25629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
25630#[inline]
25631#[target_feature(enable = "avx512f")]
25632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25633#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
25634#[rustc_legacy_const_generics(3)]
25635#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25636pub const fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(
25637    k: __mmask8,
25638    a: __m512d,
25639    b: __m512d,
25640) -> __m512d {
25641    unsafe {
25642        static_assert_uimm_bits!(MASK, 8);
25643        let r = _mm512_shuffle_f64x2::<MASK>(a, b);
25644        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
25645    }
25646}
25647
25648/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
25649///
25650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
25651#[inline]
25652#[target_feature(enable = "avx512f,avx512vl")]
25653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25654#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
25655#[rustc_legacy_const_generics(2)]
25656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25657pub const fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
25658    unsafe {
25659        static_assert_uimm_bits!(MASK, 8);
25660        let a = a.as_f64x4();
25661        let b = b.as_f64x4();
25662        let r: f64x4 = simd_shuffle!(
25663            a,
25664            b,
25665            [
25666                (MASK as u32 & 0b1) * 2 + 0,
25667                (MASK as u32 & 0b1) * 2 + 1,
25668                ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
25669                ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
25670            ],
25671        );
25672        transmute(r)
25673    }
25674}
25675
25676/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25677///
25678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
25679#[inline]
25680#[target_feature(enable = "avx512f,avx512vl")]
25681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25682#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
25683#[rustc_legacy_const_generics(4)]
25684#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25685pub const fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
25686    src: __m256d,
25687    k: __mmask8,
25688    a: __m256d,
25689    b: __m256d,
25690) -> __m256d {
25691    unsafe {
25692        static_assert_uimm_bits!(MASK, 8);
25693        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
25694        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25695    }
25696}
25697
25698/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25699///
25700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
25701#[inline]
25702#[target_feature(enable = "avx512f,avx512vl")]
25703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25704#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
25705#[rustc_legacy_const_generics(3)]
25706#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25707pub const fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
25708    k: __mmask8,
25709    a: __m256d,
25710    b: __m256d,
25711) -> __m256d {
25712    unsafe {
25713        static_assert_uimm_bits!(MASK, 8);
25714        let r = _mm256_shuffle_f64x2::<MASK>(a, b);
25715        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25716    }
25717}
25718
25719/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25720///
25721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
25722#[inline]
25723#[target_feature(enable = "avx512f")]
25724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25725#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25726#[rustc_legacy_const_generics(1)]
25727#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25728pub const fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
25729    unsafe {
25730        static_assert_uimm_bits!(IMM8, 2);
25731        match IMM8 & 0x3 {
25732            0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
25733            1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
25734            2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
25735            _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
25736        }
25737    }
25738}
25739
25740/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25741///
25742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
25743#[inline]
25744#[target_feature(enable = "avx512f")]
25745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25746#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25747#[rustc_legacy_const_generics(3)]
25748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25749pub const fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
25750    src: __m128,
25751    k: __mmask8,
25752    a: __m512,
25753) -> __m128 {
25754    unsafe {
25755        static_assert_uimm_bits!(IMM8, 2);
25756        let r = _mm512_extractf32x4_ps::<IMM8>(a);
25757        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
25758    }
25759}
25760
25761/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25762///
25763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
25764#[inline]
25765#[target_feature(enable = "avx512f")]
25766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25767#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
25768#[rustc_legacy_const_generics(2)]
25769#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25770pub const fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
25771    unsafe {
25772        static_assert_uimm_bits!(IMM8, 2);
25773        let r = _mm512_extractf32x4_ps::<IMM8>(a);
25774        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
25775    }
25776}
25777
25778/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25779///
25780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
25781#[inline]
25782#[target_feature(enable = "avx512f,avx512vl")]
25783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25784#[cfg_attr(
25785    test,
25786    assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
25787)]
25788#[rustc_legacy_const_generics(1)]
25789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25790pub const fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
25791    unsafe {
25792        static_assert_uimm_bits!(IMM8, 1);
25793        match IMM8 & 0x1 {
25794            0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
25795            _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
25796        }
25797    }
25798}
25799
25800/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25801///
25802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
25803#[inline]
25804#[target_feature(enable = "avx512f,avx512vl")]
25805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25806#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
25807#[rustc_legacy_const_generics(3)]
25808#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25809pub const fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
25810    src: __m128,
25811    k: __mmask8,
25812    a: __m256,
25813) -> __m128 {
25814    unsafe {
25815        static_assert_uimm_bits!(IMM8, 1);
25816        let r = _mm256_extractf32x4_ps::<IMM8>(a);
25817        transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
25818    }
25819}
25820
25821/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25822///
25823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
25824#[inline]
25825#[target_feature(enable = "avx512f,avx512vl")]
25826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25827#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
25828#[rustc_legacy_const_generics(2)]
25829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25830pub const fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
25831    unsafe {
25832        static_assert_uimm_bits!(IMM8, 1);
25833        let r = _mm256_extractf32x4_ps::<IMM8>(a);
25834        transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
25835    }
25836}
25837
25838/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
25839///
25840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
25841#[inline]
25842#[target_feature(enable = "avx512f")]
25843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25844#[cfg_attr(
25845    test,
25846    assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
25847)]
25848#[rustc_legacy_const_generics(1)]
25849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25850pub const fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
25851    unsafe {
25852        static_assert_uimm_bits!(IMM1, 1);
25853        match IMM1 {
25854            0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
25855            _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
25856        }
25857    }
25858}
25859
25860/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25861///
25862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
25863#[inline]
25864#[target_feature(enable = "avx512f")]
25865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25866#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25867#[rustc_legacy_const_generics(3)]
25868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25869pub const fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
25870    src: __m256i,
25871    k: __mmask8,
25872    a: __m512i,
25873) -> __m256i {
25874    unsafe {
25875        static_assert_uimm_bits!(IMM1, 1);
25876        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25877        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
25878    }
25879}
25880
25881/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25882///
25883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
25884#[inline]
25885#[target_feature(enable = "avx512f")]
25886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25887#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25888#[rustc_legacy_const_generics(2)]
25889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25890pub const fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
25891    unsafe {
25892        static_assert_uimm_bits!(IMM1, 1);
25893        let r = _mm512_extracti64x4_epi64::<IMM1>(a);
25894        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
25895    }
25896}
25897
25898/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25899///
25900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
25901#[inline]
25902#[target_feature(enable = "avx512f")]
25903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25904#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25905#[rustc_legacy_const_generics(1)]
25906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25907pub const fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
25908    unsafe {
25909        static_assert_uimm_bits!(IMM8, 1);
25910        match IMM8 & 0x1 {
25911            0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
25912            _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
25913        }
25914    }
25915}
25916
25917/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25918///
25919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25920#[inline]
25921#[target_feature(enable = "avx512f")]
25922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25923#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25924#[rustc_legacy_const_generics(3)]
25925#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25926pub const fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25927    src: __m256d,
25928    k: __mmask8,
25929    a: __m512d,
25930) -> __m256d {
25931    unsafe {
25932        static_assert_uimm_bits!(IMM8, 1);
25933        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25934        transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
25935    }
25936}
25937
25938/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25939///
25940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25941#[inline]
25942#[target_feature(enable = "avx512f")]
25943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25944#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25945#[rustc_legacy_const_generics(2)]
25946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25947pub const fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25948    unsafe {
25949        static_assert_uimm_bits!(IMM8, 1);
25950        let r = _mm512_extractf64x4_pd::<IMM8>(a);
25951        transmute(simd_select_bitmask(k, r.as_f64x4(), f64x4::ZERO))
25952    }
25953}
25954
25955/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25956///
25957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25958#[inline]
25959#[target_feature(enable = "avx512f")]
25960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25961#[cfg_attr(
25962    test,
25963    assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25964)]
25965#[rustc_legacy_const_generics(1)]
25966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25967pub const fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25968    unsafe {
25969        static_assert_uimm_bits!(IMM2, 2);
25970        let a = a.as_i32x16();
25971        let zero = i32x16::ZERO;
25972        let extract: i32x4 = match IMM2 {
25973            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25974            1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25975            2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25976            _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25977        };
25978        transmute(extract)
25979    }
25980}
25981
25982/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25983///
25984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25985#[inline]
25986#[target_feature(enable = "avx512f")]
25987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25988#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25989#[rustc_legacy_const_generics(3)]
25990#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
25991pub const fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25992    src: __m128i,
25993    k: __mmask8,
25994    a: __m512i,
25995) -> __m128i {
25996    unsafe {
25997        static_assert_uimm_bits!(IMM2, 2);
25998        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
25999        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
26000    }
26001}
26002
26003/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26004///
26005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
26006#[inline]
26007#[target_feature(enable = "avx512f")]
26008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26009#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
26010#[rustc_legacy_const_generics(2)]
26011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26012pub const fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
26013    unsafe {
26014        static_assert_uimm_bits!(IMM2, 2);
26015        let r = _mm512_extracti32x4_epi32::<IMM2>(a);
26016        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
26017    }
26018}
26019
26020/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
26021///
26022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
26023#[inline]
26024#[target_feature(enable = "avx512f,avx512vl")]
26025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26026#[cfg_attr(
26027    test,
26028    assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
26029)]
26030#[rustc_legacy_const_generics(1)]
26031#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26032pub const fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
26033    unsafe {
26034        static_assert_uimm_bits!(IMM1, 1);
26035        let a = a.as_i32x8();
26036        let zero = i32x8::ZERO;
26037        let extract: i32x4 = match IMM1 {
26038            0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
26039            _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
26040        };
26041        transmute(extract)
26042    }
26043}
26044
26045/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26046///
26047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
26048#[inline]
26049#[target_feature(enable = "avx512f,avx512vl")]
26050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26051#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
26052#[rustc_legacy_const_generics(3)]
26053#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26054pub const fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
26055    src: __m128i,
26056    k: __mmask8,
26057    a: __m256i,
26058) -> __m128i {
26059    unsafe {
26060        static_assert_uimm_bits!(IMM1, 1);
26061        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
26062        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
26063    }
26064}
26065
26066/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26067///
26068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
26069#[inline]
26070#[target_feature(enable = "avx512f,avx512vl")]
26071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26072#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
26073#[rustc_legacy_const_generics(2)]
26074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26075pub const fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
26076    unsafe {
26077        static_assert_uimm_bits!(IMM1, 1);
26078        let r = _mm256_extracti32x4_epi32::<IMM1>(a);
26079        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
26080    }
26081}
26082
26083/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26084///
26085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
26086#[inline]
26087#[target_feature(enable = "avx512f")]
26088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26089#[cfg_attr(test, assert_instr(vmovsldup))]
26090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26091pub const fn _mm512_moveldup_ps(a: __m512) -> __m512 {
26092    unsafe {
26093        let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26094        transmute(r)
26095    }
26096}
26097
26098/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26104#[cfg_attr(test, assert_instr(vmovsldup))]
26105#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26106pub const fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26107    unsafe {
26108        let mov: f32x16 =
26109            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26110        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
26111    }
26112}
26113
26114/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26115///
26116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
26117#[inline]
26118#[target_feature(enable = "avx512f")]
26119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26120#[cfg_attr(test, assert_instr(vmovsldup))]
26121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26122pub const fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
26123    unsafe {
26124        let mov: f32x16 =
26125            simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
26126        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
26127    }
26128}
26129
26130/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26131///
26132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
26133#[inline]
26134#[target_feature(enable = "avx512f,avx512vl")]
26135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26136#[cfg_attr(test, assert_instr(vmovsldup))]
26137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26138pub const fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26139    unsafe {
26140        let mov = _mm256_moveldup_ps(a);
26141        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
26142    }
26143}
26144
26145/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26146///
26147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
26148#[inline]
26149#[target_feature(enable = "avx512f,avx512vl")]
26150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26151#[cfg_attr(test, assert_instr(vmovsldup))]
26152#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26153pub const fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
26154    unsafe {
26155        let mov = _mm256_moveldup_ps(a);
26156        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
26157    }
26158}
26159
26160/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26161///
26162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
26163#[inline]
26164#[target_feature(enable = "avx512f,avx512vl")]
26165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26166#[cfg_attr(test, assert_instr(vmovsldup))]
26167#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26168pub const fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26169    unsafe {
26170        let mov = _mm_moveldup_ps(a);
26171        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
26172    }
26173}
26174
26175/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26176///
26177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
26178#[inline]
26179#[target_feature(enable = "avx512f,avx512vl")]
26180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26181#[cfg_attr(test, assert_instr(vmovsldup))]
26182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26183pub const fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
26184    unsafe {
26185        let mov = _mm_moveldup_ps(a);
26186        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
26187    }
26188}
26189
26190/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
26191///
26192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
26193#[inline]
26194#[target_feature(enable = "avx512f")]
26195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26196#[cfg_attr(test, assert_instr(vmovshdup))]
26197#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26198pub const fn _mm512_movehdup_ps(a: __m512) -> __m512 {
26199    unsafe {
26200        let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26201        transmute(r)
26202    }
26203}
26204
26205/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26206///
26207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
26208#[inline]
26209#[target_feature(enable = "avx512f")]
26210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26211#[cfg_attr(test, assert_instr(vmovshdup))]
26212#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26213pub const fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
26214    unsafe {
26215        let mov: f32x16 =
26216            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26217        transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
26218    }
26219}
26220
26221/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26222///
26223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
26224#[inline]
26225#[target_feature(enable = "avx512f")]
26226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26227#[cfg_attr(test, assert_instr(vmovshdup))]
26228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26229pub const fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
26230    unsafe {
26231        let mov: f32x16 =
26232            simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
26233        transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
26234    }
26235}
26236
26237/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26238///
26239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
26240#[inline]
26241#[target_feature(enable = "avx512f,avx512vl")]
26242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26243#[cfg_attr(test, assert_instr(vmovshdup))]
26244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26245pub const fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
26246    unsafe {
26247        let mov = _mm256_movehdup_ps(a);
26248        transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
26249    }
26250}
26251
26252/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26253///
26254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
26255#[inline]
26256#[target_feature(enable = "avx512f,avx512vl")]
26257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26258#[cfg_attr(test, assert_instr(vmovshdup))]
26259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26260pub const fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
26261    unsafe {
26262        let mov = _mm256_movehdup_ps(a);
26263        transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
26264    }
26265}
26266
26267/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26268///
26269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
26270#[inline]
26271#[target_feature(enable = "avx512f,avx512vl")]
26272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26273#[cfg_attr(test, assert_instr(vmovshdup))]
26274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26275pub const fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
26276    unsafe {
26277        let mov = _mm_movehdup_ps(a);
26278        transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
26279    }
26280}
26281
26282/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26283///
26284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
26285#[inline]
26286#[target_feature(enable = "avx512f,avx512vl")]
26287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26288#[cfg_attr(test, assert_instr(vmovshdup))]
26289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26290pub const fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
26291    unsafe {
26292        let mov = _mm_movehdup_ps(a);
26293        transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
26294    }
26295}
26296
26297/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
26298///
26299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
26300#[inline]
26301#[target_feature(enable = "avx512f")]
26302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26303#[cfg_attr(test, assert_instr(vmovddup))]
26304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26305pub const fn _mm512_movedup_pd(a: __m512d) -> __m512d {
26306    unsafe {
26307        let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26308        transmute(r)
26309    }
26310}
26311
26312/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26313///
26314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
26315#[inline]
26316#[target_feature(enable = "avx512f")]
26317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26318#[cfg_attr(test, assert_instr(vmovddup))]
26319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26320pub const fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
26321    unsafe {
26322        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26323        transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
26324    }
26325}
26326
26327/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26328///
26329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
26330#[inline]
26331#[target_feature(enable = "avx512f")]
26332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26333#[cfg_attr(test, assert_instr(vmovddup))]
26334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26335pub const fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
26336    unsafe {
26337        let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
26338        transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
26339    }
26340}
26341
26342/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26343///
26344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
26345#[inline]
26346#[target_feature(enable = "avx512f,avx512vl")]
26347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26348#[cfg_attr(test, assert_instr(vmovddup))]
26349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26350pub const fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
26351    unsafe {
26352        let mov = _mm256_movedup_pd(a);
26353        transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
26354    }
26355}
26356
26357/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26358///
26359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
26360#[inline]
26361#[target_feature(enable = "avx512f,avx512vl")]
26362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26363#[cfg_attr(test, assert_instr(vmovddup))]
26364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26365pub const fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
26366    unsafe {
26367        let mov = _mm256_movedup_pd(a);
26368        transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
26369    }
26370}
26371
26372/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26373///
26374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
26375#[inline]
26376#[target_feature(enable = "avx512f,avx512vl")]
26377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26378#[cfg_attr(test, assert_instr(vmovddup))]
26379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26380pub const fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
26381    unsafe {
26382        let mov = _mm_movedup_pd(a);
26383        transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
26384    }
26385}
26386
26387/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26388///
26389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
26390#[inline]
26391#[target_feature(enable = "avx512f,avx512vl")]
26392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26393#[cfg_attr(test, assert_instr(vmovddup))]
26394#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26395pub const fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
26396    unsafe {
26397        let mov = _mm_movedup_pd(a);
26398        transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
26399    }
26400}
26401
26402/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26403///
26404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
26405#[inline]
26406#[target_feature(enable = "avx512f")]
26407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26408#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
26409#[rustc_legacy_const_generics(2)]
26410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26411pub const fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
26412    unsafe {
26413        static_assert_uimm_bits!(IMM8, 2);
26414        let a = a.as_i32x16();
26415        let b = _mm512_castsi128_si512(b).as_i32x16();
26416        let ret: i32x16 = match IMM8 & 0b11 {
26417            0 => {
26418                simd_shuffle!(
26419                    a,
26420                    b,
26421                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
26422                )
26423            }
26424            1 => {
26425                simd_shuffle!(
26426                    a,
26427                    b,
26428                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
26429                )
26430            }
26431            2 => {
26432                simd_shuffle!(
26433                    a,
26434                    b,
26435                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
26436                )
26437            }
26438            _ => {
26439                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
26440            }
26441        };
26442        transmute(ret)
26443    }
26444}
26445
26446/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26447///
26448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
26449#[inline]
26450#[target_feature(enable = "avx512f")]
26451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26452#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
26453#[rustc_legacy_const_generics(4)]
26454#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26455pub const fn _mm512_mask_inserti32x4<const IMM8: i32>(
26456    src: __m512i,
26457    k: __mmask16,
26458    a: __m512i,
26459    b: __m128i,
26460) -> __m512i {
26461    unsafe {
26462        static_assert_uimm_bits!(IMM8, 2);
26463        let r = _mm512_inserti32x4::<IMM8>(a, b);
26464        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
26465    }
26466}
26467
26468/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26469///
26470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
26471#[inline]
26472#[target_feature(enable = "avx512f")]
26473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26474#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
26475#[rustc_legacy_const_generics(3)]
26476#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26477pub const fn _mm512_maskz_inserti32x4<const IMM8: i32>(
26478    k: __mmask16,
26479    a: __m512i,
26480    b: __m128i,
26481) -> __m512i {
26482    unsafe {
26483        static_assert_uimm_bits!(IMM8, 2);
26484        let r = _mm512_inserti32x4::<IMM8>(a, b);
26485        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
26486    }
26487}
26488
26489/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
26490///
26491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
26492#[inline]
26493#[target_feature(enable = "avx512f,avx512vl")]
26494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26495#[cfg_attr(
26496    test,
26497    assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
26498)]
26499#[rustc_legacy_const_generics(2)]
26500#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26501pub const fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
26502    unsafe {
26503        static_assert_uimm_bits!(IMM8, 1);
26504        let a = a.as_i32x8();
26505        let b = _mm256_castsi128_si256(b).as_i32x8();
26506        let ret: i32x8 = match IMM8 & 0b1 {
26507            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26508            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26509        };
26510        transmute(ret)
26511    }
26512}
26513
26514/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26515///
26516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
26517#[inline]
26518#[target_feature(enable = "avx512f,avx512vl")]
26519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26520#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
26521#[rustc_legacy_const_generics(4)]
26522#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26523pub const fn _mm256_mask_inserti32x4<const IMM8: i32>(
26524    src: __m256i,
26525    k: __mmask8,
26526    a: __m256i,
26527    b: __m128i,
26528) -> __m256i {
26529    unsafe {
26530        static_assert_uimm_bits!(IMM8, 1);
26531        let r = _mm256_inserti32x4::<IMM8>(a, b);
26532        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
26533    }
26534}
26535
26536/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26537///
26538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
26539#[inline]
26540#[target_feature(enable = "avx512f,avx512vl")]
26541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26542#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
26543#[rustc_legacy_const_generics(3)]
26544#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26545pub const fn _mm256_maskz_inserti32x4<const IMM8: i32>(
26546    k: __mmask8,
26547    a: __m256i,
26548    b: __m128i,
26549) -> __m256i {
26550    unsafe {
26551        static_assert_uimm_bits!(IMM8, 1);
26552        let r = _mm256_inserti32x4::<IMM8>(a, b);
26553        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
26554    }
26555}
26556
26557/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
26558///
26559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
26560#[inline]
26561#[target_feature(enable = "avx512f")]
26562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26563#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
26564#[rustc_legacy_const_generics(2)]
26565#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26566pub const fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
26567    unsafe {
26568        static_assert_uimm_bits!(IMM8, 1);
26569        let b = _mm512_castsi256_si512(b);
26570        match IMM8 & 0b1 {
26571            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26572            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26573        }
26574    }
26575}
26576
26577/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26578///
26579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
26580#[inline]
26581#[target_feature(enable = "avx512f")]
26582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26583#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
26584#[rustc_legacy_const_generics(4)]
26585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26586pub const fn _mm512_mask_inserti64x4<const IMM8: i32>(
26587    src: __m512i,
26588    k: __mmask8,
26589    a: __m512i,
26590    b: __m256i,
26591) -> __m512i {
26592    unsafe {
26593        static_assert_uimm_bits!(IMM8, 1);
26594        let r = _mm512_inserti64x4::<IMM8>(a, b);
26595        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
26596    }
26597}
26598
26599/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26600///
26601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
26602#[inline]
26603#[target_feature(enable = "avx512f")]
26604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26605#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
26606#[rustc_legacy_const_generics(3)]
26607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26608pub const fn _mm512_maskz_inserti64x4<const IMM8: i32>(
26609    k: __mmask8,
26610    a: __m512i,
26611    b: __m256i,
26612) -> __m512i {
26613    unsafe {
26614        static_assert_uimm_bits!(IMM8, 1);
26615        let r = _mm512_inserti64x4::<IMM8>(a, b);
26616        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
26617    }
26618}
26619
26620/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26621///
26622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
26623#[inline]
26624#[target_feature(enable = "avx512f")]
26625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26626#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26627#[rustc_legacy_const_generics(2)]
26628#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26629pub const fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
26630    unsafe {
26631        static_assert_uimm_bits!(IMM8, 2);
26632        let b = _mm512_castps128_ps512(b);
26633        match IMM8 & 0b11 {
26634            0 => {
26635                simd_shuffle!(
26636                    a,
26637                    b,
26638                    [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
26639                )
26640            }
26641            1 => {
26642                simd_shuffle!(
26643                    a,
26644                    b,
26645                    [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
26646                )
26647            }
26648            2 => {
26649                simd_shuffle!(
26650                    a,
26651                    b,
26652                    [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
26653                )
26654            }
26655            _ => {
26656                simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
26657            }
26658        }
26659    }
26660}
26661
26662/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26663///
26664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
26665#[inline]
26666#[target_feature(enable = "avx512f")]
26667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26668#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26669#[rustc_legacy_const_generics(4)]
26670#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26671pub const fn _mm512_mask_insertf32x4<const IMM8: i32>(
26672    src: __m512,
26673    k: __mmask16,
26674    a: __m512,
26675    b: __m128,
26676) -> __m512 {
26677    unsafe {
26678        static_assert_uimm_bits!(IMM8, 2);
26679        let r = _mm512_insertf32x4::<IMM8>(a, b);
26680        transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
26681    }
26682}
26683
26684/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26685///
26686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
26687#[inline]
26688#[target_feature(enable = "avx512f")]
26689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26690#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
26691#[rustc_legacy_const_generics(3)]
26692#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26693pub const fn _mm512_maskz_insertf32x4<const IMM8: i32>(
26694    k: __mmask16,
26695    a: __m512,
26696    b: __m128,
26697) -> __m512 {
26698    unsafe {
26699        static_assert_uimm_bits!(IMM8, 2);
26700        let r = _mm512_insertf32x4::<IMM8>(a, b);
26701        transmute(simd_select_bitmask(k, r.as_f32x16(), f32x16::ZERO))
26702    }
26703}
26704
26705/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
26706///
26707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
26708#[inline]
26709#[target_feature(enable = "avx512f,avx512vl")]
26710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26711#[cfg_attr(
26712    test,
26713    assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
26714)]
26715#[rustc_legacy_const_generics(2)]
26716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26717pub const fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
26718    unsafe {
26719        static_assert_uimm_bits!(IMM8, 1);
26720        let b = _mm256_castps128_ps256(b);
26721        match IMM8 & 0b1 {
26722            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26723            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26724        }
26725    }
26726}
26727
26728/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26729///
26730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
26731#[inline]
26732#[target_feature(enable = "avx512f,avx512vl")]
26733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26734#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
26735#[rustc_legacy_const_generics(4)]
26736#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26737pub const fn _mm256_mask_insertf32x4<const IMM8: i32>(
26738    src: __m256,
26739    k: __mmask8,
26740    a: __m256,
26741    b: __m128,
26742) -> __m256 {
26743    unsafe {
26744        static_assert_uimm_bits!(IMM8, 1);
26745        let r = _mm256_insertf32x4::<IMM8>(a, b);
26746        transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
26747    }
26748}
26749
26750/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26751///
26752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
26753#[inline]
26754#[target_feature(enable = "avx512f,avx512vl")]
26755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26756#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
26757#[rustc_legacy_const_generics(3)]
26758#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26759pub const fn _mm256_maskz_insertf32x4<const IMM8: i32>(
26760    k: __mmask8,
26761    a: __m256,
26762    b: __m128,
26763) -> __m256 {
26764    unsafe {
26765        static_assert_uimm_bits!(IMM8, 1);
26766        let r = _mm256_insertf32x4::<IMM8>(a, b);
26767        transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
26768    }
26769}
26770
26771/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26777#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26778#[rustc_legacy_const_generics(2)]
26779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26780pub const fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
26781    unsafe {
26782        static_assert_uimm_bits!(IMM8, 1);
26783        let b = _mm512_castpd256_pd512(b);
26784        match IMM8 & 0b1 {
26785            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
26786            _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
26787        }
26788    }
26789}
26790
26791/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26797#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26798#[rustc_legacy_const_generics(4)]
26799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26800pub const fn _mm512_mask_insertf64x4<const IMM8: i32>(
26801    src: __m512d,
26802    k: __mmask8,
26803    a: __m512d,
26804    b: __m256d,
26805) -> __m512d {
26806    unsafe {
26807        static_assert_uimm_bits!(IMM8, 1);
26808        let r = _mm512_insertf64x4::<IMM8>(a, b);
26809        transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
26810    }
26811}
26812
26813/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26814///
26815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
26816#[inline]
26817#[target_feature(enable = "avx512f")]
26818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26819#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
26820#[rustc_legacy_const_generics(3)]
26821#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26822pub const fn _mm512_maskz_insertf64x4<const IMM8: i32>(
26823    k: __mmask8,
26824    a: __m512d,
26825    b: __m256d,
26826) -> __m512d {
26827    unsafe {
26828        static_assert_uimm_bits!(IMM8, 1);
26829        let r = _mm512_insertf64x4::<IMM8>(a, b);
26830        transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
26831    }
26832}
26833
26834/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26835///
26836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
26837#[inline]
26838#[target_feature(enable = "avx512f")]
26839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26840#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
26841#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26842pub const fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
26843    unsafe {
26844        let a = a.as_i32x16();
26845        let b = b.as_i32x16();
26846        #[rustfmt::skip]
26847        let r: i32x16 = simd_shuffle!(
26848            a, b,
26849            [ 2, 18, 3, 19,
26850              2 + 4, 18 + 4, 3 + 4, 19 + 4,
26851              2 + 8, 18 + 8, 3 + 8, 19 + 8,
26852              2 + 12, 18 + 12, 3 + 12, 19 + 12],
26853        );
26854        transmute(r)
26855    }
26856}
26857
26858/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26859///
26860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
26861#[inline]
26862#[target_feature(enable = "avx512f")]
26863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26864#[cfg_attr(test, assert_instr(vpunpckhdq))]
26865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26866pub const fn _mm512_mask_unpackhi_epi32(
26867    src: __m512i,
26868    k: __mmask16,
26869    a: __m512i,
26870    b: __m512i,
26871) -> __m512i {
26872    unsafe {
26873        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
26874        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
26875    }
26876}
26877
26878/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26879///
26880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
26881#[inline]
26882#[target_feature(enable = "avx512f")]
26883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26884#[cfg_attr(test, assert_instr(vpunpckhdq))]
26885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26886pub const fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26887    unsafe {
26888        let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
26889        transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
26890    }
26891}
26892
26893/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26894///
26895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
26896#[inline]
26897#[target_feature(enable = "avx512f,avx512vl")]
26898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26899#[cfg_attr(test, assert_instr(vpunpckhdq))]
26900#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26901pub const fn _mm256_mask_unpackhi_epi32(
26902    src: __m256i,
26903    k: __mmask8,
26904    a: __m256i,
26905    b: __m256i,
26906) -> __m256i {
26907    unsafe {
26908        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
26909        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
26910    }
26911}
26912
26913/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26914///
26915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
26916#[inline]
26917#[target_feature(enable = "avx512f,avx512vl")]
26918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26919#[cfg_attr(test, assert_instr(vpunpckhdq))]
26920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26921pub const fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26922    unsafe {
26923        let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
26924        transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
26925    }
26926}
26927
26928/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26929///
26930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
26931#[inline]
26932#[target_feature(enable = "avx512f,avx512vl")]
26933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26934#[cfg_attr(test, assert_instr(vpunpckhdq))]
26935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26936pub const fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26937    unsafe {
26938        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
26939        transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
26940    }
26941}
26942
26943/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26944///
26945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
26946#[inline]
26947#[target_feature(enable = "avx512f,avx512vl")]
26948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26949#[cfg_attr(test, assert_instr(vpunpckhdq))]
26950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26951pub const fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26952    unsafe {
26953        let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
26954        transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
26955    }
26956}
26957
26958/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26959///
26960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
26961#[inline]
26962#[target_feature(enable = "avx512f")]
26963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26964#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
26965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26966pub const fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
26967    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26968}
26969
26970/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26971///
26972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
26973#[inline]
26974#[target_feature(enable = "avx512f")]
26975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26976#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26978pub const fn _mm512_mask_unpackhi_epi64(
26979    src: __m512i,
26980    k: __mmask8,
26981    a: __m512i,
26982    b: __m512i,
26983) -> __m512i {
26984    unsafe {
26985        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
26986        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
26987    }
26988}
26989
26990/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26991///
26992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26993#[inline]
26994#[target_feature(enable = "avx512f")]
26995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26996#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26997#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
26998pub const fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26999    unsafe {
27000        let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
27001        transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
27002    }
27003}
27004
27005/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27006///
27007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
27008#[inline]
27009#[target_feature(enable = "avx512f,avx512vl")]
27010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27011#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27013pub const fn _mm256_mask_unpackhi_epi64(
27014    src: __m256i,
27015    k: __mmask8,
27016    a: __m256i,
27017    b: __m256i,
27018) -> __m256i {
27019    unsafe {
27020        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
27021        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
27022    }
27023}
27024
27025/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27026///
27027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
27028#[inline]
27029#[target_feature(enable = "avx512f,avx512vl")]
27030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27031#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27032#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27033pub const fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27034    unsafe {
27035        let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
27036        transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
27037    }
27038}
27039
27040/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27041///
27042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
27043#[inline]
27044#[target_feature(enable = "avx512f,avx512vl")]
27045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27046#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27047#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27048pub const fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27049    unsafe {
27050        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
27051        transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
27052    }
27053}
27054
27055/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27056///
27057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
27058#[inline]
27059#[target_feature(enable = "avx512f,avx512vl")]
27060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27061#[cfg_attr(test, assert_instr(vpunpckhqdq))]
27062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27063pub const fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27064    unsafe {
27065        let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
27066        transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
27067    }
27068}
27069
27070/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27071///
27072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
27073#[inline]
27074#[target_feature(enable = "avx512f")]
27075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27076#[cfg_attr(test, assert_instr(vunpckhps))]
27077#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27078pub const fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
27079    unsafe {
27080        #[rustfmt::skip]
27081        simd_shuffle!(
27082            a, b,
27083            [ 2, 18, 3, 19,
27084              2 + 4, 18 + 4, 3 + 4, 19 + 4,
27085              2 + 8, 18 + 8, 3 + 8, 19 + 8,
27086              2 + 12, 18 + 12, 3 + 12, 19 + 12],
27087        )
27088    }
27089}
27090
27091/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27092///
27093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
27094#[inline]
27095#[target_feature(enable = "avx512f")]
27096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27097#[cfg_attr(test, assert_instr(vunpckhps))]
27098#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27099pub const fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27100    unsafe {
27101        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
27102        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
27103    }
27104}
27105
27106/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27107///
27108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
27109#[inline]
27110#[target_feature(enable = "avx512f")]
27111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27112#[cfg_attr(test, assert_instr(vunpckhps))]
27113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27114pub const fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27115    unsafe {
27116        let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
27117        transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
27118    }
27119}
27120
27121/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27122///
27123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
27124#[inline]
27125#[target_feature(enable = "avx512f,avx512vl")]
27126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27127#[cfg_attr(test, assert_instr(vunpckhps))]
27128#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27129pub const fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27130    unsafe {
27131        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
27132        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
27133    }
27134}
27135
27136/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27137///
27138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
27139#[inline]
27140#[target_feature(enable = "avx512f,avx512vl")]
27141#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27142#[cfg_attr(test, assert_instr(vunpckhps))]
27143#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27144pub const fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27145    unsafe {
27146        let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
27147        transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
27148    }
27149}
27150
27151/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27152///
27153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
27154#[inline]
27155#[target_feature(enable = "avx512f,avx512vl")]
27156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27157#[cfg_attr(test, assert_instr(vunpckhps))]
27158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27159pub const fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27160    unsafe {
27161        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
27162        transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
27163    }
27164}
27165
27166/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27167///
27168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
27169#[inline]
27170#[target_feature(enable = "avx512f,avx512vl")]
27171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27172#[cfg_attr(test, assert_instr(vunpckhps))]
27173#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27174pub const fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27175    unsafe {
27176        let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
27177        transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
27178    }
27179}
27180
27181/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
27182///
27183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
27184#[inline]
27185#[target_feature(enable = "avx512f")]
27186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27187#[cfg_attr(test, assert_instr(vunpckhpd))]
27188#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27189pub const fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
27190    unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
27191}
27192
27193/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27194///
27195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
27196#[inline]
27197#[target_feature(enable = "avx512f")]
27198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27199#[cfg_attr(test, assert_instr(vunpckhpd))]
27200#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27201pub const fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27202    unsafe {
27203        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
27204        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
27205    }
27206}
27207
27208/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27209///
27210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
27211#[inline]
27212#[target_feature(enable = "avx512f")]
27213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27214#[cfg_attr(test, assert_instr(vunpckhpd))]
27215#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27216pub const fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27217    unsafe {
27218        let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
27219        transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
27220    }
27221}
27222
27223/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27224///
27225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
27226#[inline]
27227#[target_feature(enable = "avx512f,avx512vl")]
27228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27229#[cfg_attr(test, assert_instr(vunpckhpd))]
27230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27231pub const fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27232    unsafe {
27233        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
27234        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
27235    }
27236}
27237
27238/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27239///
27240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
27241#[inline]
27242#[target_feature(enable = "avx512f,avx512vl")]
27243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27244#[cfg_attr(test, assert_instr(vunpckhpd))]
27245#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27246pub const fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27247    unsafe {
27248        let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
27249        transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
27250    }
27251}
27252
27253/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27254///
27255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
27256#[inline]
27257#[target_feature(enable = "avx512f,avx512vl")]
27258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27259#[cfg_attr(test, assert_instr(vunpckhpd))]
27260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27261pub const fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27262    unsafe {
27263        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
27264        transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
27265    }
27266}
27267
27268/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27269///
27270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
27271#[inline]
27272#[target_feature(enable = "avx512f,avx512vl")]
27273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27274#[cfg_attr(test, assert_instr(vunpckhpd))]
27275#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27276pub const fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27277    unsafe {
27278        let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
27279        transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
27280    }
27281}
27282
27283/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27284///
27285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
27286#[inline]
27287#[target_feature(enable = "avx512f")]
27288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27289#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
27290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27291pub const fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
27292    unsafe {
27293        let a = a.as_i32x16();
27294        let b = b.as_i32x16();
27295        #[rustfmt::skip]
27296        let r: i32x16 = simd_shuffle!(
27297            a, b,
27298            [ 0, 16, 1, 17,
27299              0 + 4, 16 + 4, 1 + 4, 17 + 4,
27300              0 + 8, 16 + 8, 1 + 8, 17 + 8,
27301              0 + 12, 16 + 12, 1 + 12, 17 + 12],
27302        );
27303        transmute(r)
27304    }
27305}
27306
27307/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27308///
27309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
27310#[inline]
27311#[target_feature(enable = "avx512f")]
27312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27313#[cfg_attr(test, assert_instr(vpunpckldq))]
27314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27315pub const fn _mm512_mask_unpacklo_epi32(
27316    src: __m512i,
27317    k: __mmask16,
27318    a: __m512i,
27319    b: __m512i,
27320) -> __m512i {
27321    unsafe {
27322        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
27323        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
27324    }
27325}
27326
27327/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27328///
27329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
27330#[inline]
27331#[target_feature(enable = "avx512f")]
27332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27333#[cfg_attr(test, assert_instr(vpunpckldq))]
27334#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27335pub const fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27336    unsafe {
27337        let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
27338        transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
27339    }
27340}
27341
27342/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343///
27344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
27345#[inline]
27346#[target_feature(enable = "avx512f,avx512vl")]
27347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27348#[cfg_attr(test, assert_instr(vpunpckldq))]
27349#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27350pub const fn _mm256_mask_unpacklo_epi32(
27351    src: __m256i,
27352    k: __mmask8,
27353    a: __m256i,
27354    b: __m256i,
27355) -> __m256i {
27356    unsafe {
27357        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
27358        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
27359    }
27360}
27361
27362/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27363///
27364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
27365#[inline]
27366#[target_feature(enable = "avx512f,avx512vl")]
27367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27368#[cfg_attr(test, assert_instr(vpunpckldq))]
27369#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27370pub const fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27371    unsafe {
27372        let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
27373        transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
27374    }
27375}
27376
27377/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27378///
27379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
27380#[inline]
27381#[target_feature(enable = "avx512f,avx512vl")]
27382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27383#[cfg_attr(test, assert_instr(vpunpckldq))]
27384#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27385pub const fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27386    unsafe {
27387        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
27388        transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
27389    }
27390}
27391
27392/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27393///
27394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
27395#[inline]
27396#[target_feature(enable = "avx512f,avx512vl")]
27397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27398#[cfg_attr(test, assert_instr(vpunpckldq))]
27399#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27400pub const fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27401    unsafe {
27402        let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
27403        transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
27404    }
27405}
27406
27407/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
27408///
27409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
27410#[inline]
27411#[target_feature(enable = "avx512f")]
27412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27413#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
27414#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27415pub const fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
27416    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
27417}
27418
27419/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27420///
27421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
27422#[inline]
27423#[target_feature(enable = "avx512f")]
27424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27425#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27426#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27427pub const fn _mm512_mask_unpacklo_epi64(
27428    src: __m512i,
27429    k: __mmask8,
27430    a: __m512i,
27431    b: __m512i,
27432) -> __m512i {
27433    unsafe {
27434        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
27435        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
27436    }
27437}
27438
27439/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27440///
27441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
27442#[inline]
27443#[target_feature(enable = "avx512f")]
27444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27445#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27447pub const fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27448    unsafe {
27449        let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
27450        transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
27451    }
27452}
27453
27454/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27455///
27456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
27457#[inline]
27458#[target_feature(enable = "avx512f,avx512vl")]
27459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27460#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27461#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27462pub const fn _mm256_mask_unpacklo_epi64(
27463    src: __m256i,
27464    k: __mmask8,
27465    a: __m256i,
27466    b: __m256i,
27467) -> __m256i {
27468    unsafe {
27469        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
27470        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
27471    }
27472}
27473
27474/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27475///
27476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
27477#[inline]
27478#[target_feature(enable = "avx512f,avx512vl")]
27479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27480#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27482pub const fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27483    unsafe {
27484        let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
27485        transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
27486    }
27487}
27488
27489/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27490///
27491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
27492#[inline]
27493#[target_feature(enable = "avx512f,avx512vl")]
27494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27495#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27496#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27497pub const fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27498    unsafe {
27499        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
27500        transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
27501    }
27502}
27503
27504/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27505///
27506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
27507#[inline]
27508#[target_feature(enable = "avx512f,avx512vl")]
27509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27510#[cfg_attr(test, assert_instr(vpunpcklqdq))]
27511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27512pub const fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27513    unsafe {
27514        let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
27515        transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
27516    }
27517}
27518
27519/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27520///
27521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
27522#[inline]
27523#[target_feature(enable = "avx512f")]
27524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27525#[cfg_attr(test, assert_instr(vunpcklps))]
27526#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27527pub const fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
27528    unsafe {
27529        #[rustfmt::skip]
27530        simd_shuffle!(a, b,
27531                       [ 0, 16, 1, 17,
27532                         0 + 4, 16 + 4, 1 + 4, 17 + 4,
27533                         0 + 8, 16 + 8, 1 + 8, 17 + 8,
27534                         0 + 12, 16 + 12, 1 + 12, 17 + 12],
27535        )
27536    }
27537}
27538
27539/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27540///
27541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
27542#[inline]
27543#[target_feature(enable = "avx512f")]
27544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27545#[cfg_attr(test, assert_instr(vunpcklps))]
27546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27547pub const fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
27548    unsafe {
27549        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
27550        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
27551    }
27552}
27553
27554/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27555///
27556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
27557#[inline]
27558#[target_feature(enable = "avx512f")]
27559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27560#[cfg_attr(test, assert_instr(vunpcklps))]
27561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27562pub const fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27563    unsafe {
27564        let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
27565        transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
27566    }
27567}
27568
27569/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27570///
27571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
27572#[inline]
27573#[target_feature(enable = "avx512f,avx512vl")]
27574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27575#[cfg_attr(test, assert_instr(vunpcklps))]
27576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27577pub const fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
27578    unsafe {
27579        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
27580        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
27581    }
27582}
27583
27584/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27585///
27586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
27587#[inline]
27588#[target_feature(enable = "avx512f,avx512vl")]
27589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27590#[cfg_attr(test, assert_instr(vunpcklps))]
27591#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27592pub const fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27593    unsafe {
27594        let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
27595        transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
27596    }
27597}
27598
27599/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27600///
27601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
27602#[inline]
27603#[target_feature(enable = "avx512f,avx512vl")]
27604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27605#[cfg_attr(test, assert_instr(vunpcklps))]
27606#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27607pub const fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
27608    unsafe {
27609        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
27610        transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
27611    }
27612}
27613
27614/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27615///
27616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
27617#[inline]
27618#[target_feature(enable = "avx512f,avx512vl")]
27619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27620#[cfg_attr(test, assert_instr(vunpcklps))]
27621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27622pub const fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27623    unsafe {
27624        let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
27625        transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
27626    }
27627}
27628
27629/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
27630///
27631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
27632#[inline]
27633#[target_feature(enable = "avx512f")]
27634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27635#[cfg_attr(test, assert_instr(vunpcklpd))]
27636#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27637pub const fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
27638    unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
27639}
27640
27641/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27642///
27643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
27644#[inline]
27645#[target_feature(enable = "avx512f")]
27646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27647#[cfg_attr(test, assert_instr(vunpcklpd))]
27648#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27649pub const fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27650    unsafe {
27651        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
27652        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
27653    }
27654}
27655
27656/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27657///
27658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
27659#[inline]
27660#[target_feature(enable = "avx512f")]
27661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27662#[cfg_attr(test, assert_instr(vunpcklpd))]
27663#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27664pub const fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27665    unsafe {
27666        let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
27667        transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
27668    }
27669}
27670
27671/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27672///
27673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
27674#[inline]
27675#[target_feature(enable = "avx512f,avx512vl")]
27676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27677#[cfg_attr(test, assert_instr(vunpcklpd))]
27678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27679pub const fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27680    unsafe {
27681        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
27682        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
27683    }
27684}
27685
27686/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27687///
27688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
27689#[inline]
27690#[target_feature(enable = "avx512f,avx512vl")]
27691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27692#[cfg_attr(test, assert_instr(vunpcklpd))]
27693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27694pub const fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27695    unsafe {
27696        let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
27697        transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
27698    }
27699}
27700
27701/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27702///
27703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
27704#[inline]
27705#[target_feature(enable = "avx512f,avx512vl")]
27706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27707#[cfg_attr(test, assert_instr(vunpcklpd))]
27708#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27709pub const fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27710    unsafe {
27711        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
27712        transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
27713    }
27714}
27715
27716/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27717///
27718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
27719#[inline]
27720#[target_feature(enable = "avx512f,avx512vl")]
27721#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27722#[cfg_attr(test, assert_instr(vunpcklpd))]
27723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27724pub const fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27725    unsafe {
27726        let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
27727        transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
27728    }
27729}
27730
27731/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are indeterminate.
27732///
27733/// In the Intel documentation, the upper bits are declared to be "undefined".
27734/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27735/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27736///
27737/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27738///
27739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
27740#[inline]
27741#[target_feature(enable = "avx512f")]
27742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27743#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27744pub const fn _mm512_castps128_ps512(a: __m128) -> __m512 {
27745    unsafe {
27746        simd_shuffle!(
27747            a,
27748            _mm_undefined_ps(),
27749            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
27750        )
27751    }
27752}
27753
27754/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are indeterminate.
27755///
27756/// In the Intel documentation, the upper bits are declared to be "undefined".
27757/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27758/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27759///
27760/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27761///
27762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
27763#[inline]
27764#[target_feature(enable = "avx512f")]
27765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27766#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27767pub const fn _mm512_castps256_ps512(a: __m256) -> __m512 {
27768    unsafe {
27769        simd_shuffle!(
27770            a,
27771            _mm256_undefined_ps(),
27772            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
27773        )
27774    }
27775}
27776
27777/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27778///
27779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
27780#[inline]
27781#[target_feature(enable = "avx512f")]
27782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27783#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27784pub const fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
27785    unsafe {
27786        simd_shuffle!(
27787            a,
27788            _mm_set1_ps(0.),
27789            [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
27790        )
27791    }
27792}
27793
27794/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27795///
27796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
27797#[inline]
27798#[target_feature(enable = "avx512f")]
27799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27801pub const fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
27802    unsafe {
27803        simd_shuffle!(
27804            a,
27805            _mm256_set1_ps(0.),
27806            [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
27807        )
27808    }
27809}
27810
27811/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27812///
27813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
27814#[inline]
27815#[target_feature(enable = "avx512f")]
27816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27817#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27818pub const fn _mm512_castps512_ps128(a: __m512) -> __m128 {
27819    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
27820}
27821
27822/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27823///
27824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
27825#[inline]
27826#[target_feature(enable = "avx512f")]
27827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27829pub const fn _mm512_castps512_ps256(a: __m512) -> __m256 {
27830    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
27831}
27832
27833/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27834///
27835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
27836#[inline]
27837#[target_feature(enable = "avx512f")]
27838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27839#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27840pub const fn _mm512_castps_pd(a: __m512) -> __m512d {
27841    unsafe { transmute(a) }
27842}
27843
27844/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27845///
27846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
27847#[inline]
27848#[target_feature(enable = "avx512f")]
27849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27850#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27851pub const fn _mm512_castps_si512(a: __m512) -> __m512i {
27852    unsafe { transmute(a) }
27853}
27854
27855/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are indeterminate.
27856///
27857/// In the Intel documentation, the upper bits are declared to be "undefined".
27858/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27859/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27860///
27861/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27862///
27863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
27864#[inline]
27865#[target_feature(enable = "avx512f")]
27866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27868pub const fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
27869    unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27870}
27871
27872/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are indeterminate.
27873///
27874/// In the Intel documentation, the upper bits are declared to be "undefined".
27875/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27876/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27877///
27878/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27879///
27880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
27881#[inline]
27882#[target_feature(enable = "avx512f")]
27883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27884#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27885pub const fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
27886    unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
27887}
27888
27889/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27890///
27891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
27892#[inline]
27893#[target_feature(enable = "avx512f")]
27894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27896pub const fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
27897    unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
27898}
27899
27900/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27901///
27902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
27903#[inline]
27904#[target_feature(enable = "avx512f")]
27905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27906#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27907pub const fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
27908    unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
27909}
27910
27911/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27912///
27913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
27914#[inline]
27915#[target_feature(enable = "avx512f")]
27916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27917#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27918pub const fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
27919    unsafe { simd_shuffle!(a, a, [0, 1]) }
27920}
27921
27922/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27923///
27924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
27925#[inline]
27926#[target_feature(enable = "avx512f")]
27927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27929pub const fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
27930    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
27931}
27932
27933/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27934///
27935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
27936#[inline]
27937#[target_feature(enable = "avx512f")]
27938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27939#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27940pub const fn _mm512_castpd_ps(a: __m512d) -> __m512 {
27941    unsafe { transmute(a) }
27942}
27943
27944/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27945///
27946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
27947#[inline]
27948#[target_feature(enable = "avx512f")]
27949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27950#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27951pub const fn _mm512_castpd_si512(a: __m512d) -> __m512i {
27952    unsafe { transmute(a) }
27953}
27954
27955/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are indeterminate.
27956///
27957/// In the Intel documentation, the upper bits are declared to be "undefined".
27958/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27959/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27960///
27961/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27962///
27963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
27964#[inline]
27965#[target_feature(enable = "avx512f")]
27966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27967#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27968pub const fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
27969    unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27970}
27971
27972/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are indeterminate.
27973///
27974/// In the Intel documentation, the upper bits are declared to be "undefined".
27975/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
27976/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
27977///
27978/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27979///
27980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
27981#[inline]
27982#[target_feature(enable = "avx512f")]
27983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27984#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27985pub const fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
27986    unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
27987}
27988
27989/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
27990///
27991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
27992#[inline]
27993#[target_feature(enable = "avx512f")]
27994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27995#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
27996pub const fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
27997    unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
27998}
27999
28000/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28001///
28002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
28003#[inline]
28004#[target_feature(enable = "avx512f")]
28005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28007pub const fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
28008    unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
28009}
28010
28011/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28012///
28013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
28014#[inline]
28015#[target_feature(enable = "avx512f")]
28016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28017#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28018pub const fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
28019    unsafe { simd_shuffle!(a, a, [0, 1]) }
28020}
28021
28022/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28023///
28024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
28025#[inline]
28026#[target_feature(enable = "avx512f")]
28027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28029pub const fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
28030    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
28031}
28032
28033/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28034///
28035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
28036#[inline]
28037#[target_feature(enable = "avx512f")]
28038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28039#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28040pub const fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
28041    unsafe { transmute(a) }
28042}
28043
28044/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
28045///
28046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
28047#[inline]
28048#[target_feature(enable = "avx512f")]
28049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28051pub const fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
28052    unsafe { transmute(a) }
28053}
28054
28055/// Copy the lower 32-bit integer in a to dst.
28056///
28057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
28058#[inline]
28059#[target_feature(enable = "avx512f")]
28060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28061#[cfg_attr(test, assert_instr(vmovd))]
28062#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28063pub const fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
28064    unsafe { simd_extract!(a.as_i32x16(), 0) }
28065}
28066
28067/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
28068///
28069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
28070#[inline]
28071#[target_feature(enable = "avx512f")]
28072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28073#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28074pub const fn _mm512_cvtss_f32(a: __m512) -> f32 {
28075    unsafe { simd_extract!(a, 0) }
28076}
28077
28078/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
28079///
28080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
28081#[inline]
28082#[target_feature(enable = "avx512f")]
28083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28085pub const fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
28086    unsafe { simd_extract!(a, 0) }
28087}
28088
28089/// Broadcast the low packed 32-bit integer from a to all elements of dst.
28090///
28091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
28092#[inline]
28093#[target_feature(enable = "avx512f")]
28094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28095#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
28096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28097pub const fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
28098    unsafe {
28099        let a = _mm512_castsi128_si512(a).as_i32x16();
28100        let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
28101        transmute(ret)
28102    }
28103}
28104
28105/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28106///
28107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
28108#[inline]
28109#[target_feature(enable = "avx512f")]
28110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28111#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28112#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28113pub const fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28114    unsafe {
28115        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
28116        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
28117    }
28118}
28119
28120/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28121///
28122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
28123#[inline]
28124#[target_feature(enable = "avx512f")]
28125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28126#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28127#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28128pub const fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
28129    unsafe {
28130        let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
28131        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
28132    }
28133}
28134
28135/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28136///
28137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
28138#[inline]
28139#[target_feature(enable = "avx512f,avx512vl")]
28140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28141#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28142#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28143pub const fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28144    unsafe {
28145        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
28146        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
28147    }
28148}
28149
28150/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28151///
28152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
28153#[inline]
28154#[target_feature(enable = "avx512f,avx512vl")]
28155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28156#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28157#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28158pub const fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
28159    unsafe {
28160        let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
28161        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
28162    }
28163}
28164
28165/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28166///
28167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
28168#[inline]
28169#[target_feature(enable = "avx512f,avx512vl")]
28170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28171#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28172#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28173pub const fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28174    unsafe {
28175        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
28176        transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
28177    }
28178}
28179
28180/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28181///
28182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
28183#[inline]
28184#[target_feature(enable = "avx512f,avx512vl")]
28185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28186#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
28187#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28188pub const fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
28189    unsafe {
28190        let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
28191        transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
28192    }
28193}
28194
28195/// Broadcast the low packed 64-bit integer from a to all elements of dst.
28196///
28197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
28198#[inline]
28199#[target_feature(enable = "avx512f")]
28200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28201#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
28202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28203pub const fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
28204    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
28205}
28206
28207/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28208///
28209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
28210#[inline]
28211#[target_feature(enable = "avx512f")]
28212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28213#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28215pub const fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
28216    unsafe {
28217        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
28218        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
28219    }
28220}
28221
28222/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28223///
28224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
28225#[inline]
28226#[target_feature(enable = "avx512f")]
28227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28228#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28229#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28230pub const fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
28231    unsafe {
28232        let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
28233        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
28234    }
28235}
28236
28237/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28238///
28239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
28240#[inline]
28241#[target_feature(enable = "avx512f,avx512vl")]
28242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28243#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28245pub const fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28246    unsafe {
28247        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
28248        transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
28249    }
28250}
28251
28252/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28253///
28254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
28255#[inline]
28256#[target_feature(enable = "avx512f,avx512vl")]
28257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28258#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28260pub const fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
28261    unsafe {
28262        let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
28263        transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
28264    }
28265}
28266
28267/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28268///
28269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
28270#[inline]
28271#[target_feature(enable = "avx512f,avx512vl")]
28272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28273#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28275pub const fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
28276    unsafe {
28277        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
28278        transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
28279    }
28280}
28281
28282/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28283///
28284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
28285#[inline]
28286#[target_feature(enable = "avx512f,avx512vl")]
28287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28288#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
28289#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28290pub const fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
28291    unsafe {
28292        let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
28293        transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
28294    }
28295}
28296
28297/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
28298///
28299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
28300#[inline]
28301#[target_feature(enable = "avx512f")]
28302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28303#[cfg_attr(test, assert_instr(vbroadcastss))]
28304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28305pub const fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
28306    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
28307}
28308
28309/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28310///
28311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
28312#[inline]
28313#[target_feature(enable = "avx512f")]
28314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28315#[cfg_attr(test, assert_instr(vbroadcastss))]
28316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28317pub const fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28318    unsafe {
28319        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
28320        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
28321    }
28322}
28323
28324/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28325///
28326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
28327#[inline]
28328#[target_feature(enable = "avx512f")]
28329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28330#[cfg_attr(test, assert_instr(vbroadcastss))]
28331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28332pub const fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
28333    unsafe {
28334        let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
28335        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
28336    }
28337}
28338
28339/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28340///
28341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
28342#[inline]
28343#[target_feature(enable = "avx512f,avx512vl")]
28344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28345#[cfg_attr(test, assert_instr(vbroadcastss))]
28346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28347pub const fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28348    unsafe {
28349        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
28350        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
28351    }
28352}
28353
28354/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28355///
28356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
28357#[inline]
28358#[target_feature(enable = "avx512f,avx512vl")]
28359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28360#[cfg_attr(test, assert_instr(vbroadcastss))]
28361#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28362pub const fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
28363    unsafe {
28364        let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
28365        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
28366    }
28367}
28368
28369/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28370///
28371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
28372#[inline]
28373#[target_feature(enable = "avx512f,avx512vl")]
28374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28375#[cfg_attr(test, assert_instr(vbroadcastss))]
28376#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28377pub const fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
28378    unsafe {
28379        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
28380        transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
28381    }
28382}
28383
28384/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28385///
28386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
28387#[inline]
28388#[target_feature(enable = "avx512f,avx512vl")]
28389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28390#[cfg_attr(test, assert_instr(vbroadcastss))]
28391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28392pub const fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
28393    unsafe {
28394        let broadcast = _mm_broadcastss_ps(a).as_f32x4();
28395        transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
28396    }
28397}
28398
28399/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
28400///
28401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
28402#[inline]
28403#[target_feature(enable = "avx512f")]
28404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28405#[cfg_attr(test, assert_instr(vbroadcastsd))]
28406#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28407pub const fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
28408    unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
28409}
28410
28411/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28412///
28413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
28414#[inline]
28415#[target_feature(enable = "avx512f")]
28416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28417#[cfg_attr(test, assert_instr(vbroadcastsd))]
28418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28419pub const fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
28420    unsafe {
28421        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
28422        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
28423    }
28424}
28425
28426/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28427///
28428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
28429#[inline]
28430#[target_feature(enable = "avx512f")]
28431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28432#[cfg_attr(test, assert_instr(vbroadcastsd))]
28433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28434pub const fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
28435    unsafe {
28436        let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
28437        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
28438    }
28439}
28440
28441/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28442///
28443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
28444#[inline]
28445#[target_feature(enable = "avx512f,avx512vl")]
28446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28447#[cfg_attr(test, assert_instr(vbroadcastsd))]
28448#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28449pub const fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
28450    unsafe {
28451        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
28452        transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
28453    }
28454}
28455
28456/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28457///
28458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
28459#[inline]
28460#[target_feature(enable = "avx512f,avx512vl")]
28461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28462#[cfg_attr(test, assert_instr(vbroadcastsd))]
28463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28464pub const fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
28465    unsafe {
28466        let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
28467        transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
28468    }
28469}
28470
28471/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28472///
28473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
28474#[inline]
28475#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28478pub const fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
28479    unsafe {
28480        let a = a.as_i32x4();
28481        let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
28482        transmute(ret)
28483    }
28484}
28485
28486/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28487///
28488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
28489#[inline]
28490#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28492#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28493pub const fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
28494    unsafe {
28495        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
28496        transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
28497    }
28498}
28499
28500/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28501///
28502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
28503#[inline]
28504#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
28505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28507pub const fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
28508    unsafe {
28509        let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
28510        transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
28511    }
28512}
28513
28514/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
28515///
28516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
28517#[inline]
28518#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28521pub const fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
28522    unsafe {
28523        let a = a.as_i32x4();
28524        let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
28525        transmute(ret)
28526    }
28527}
28528
28529/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28530///
28531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
28532#[inline]
28533#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28536pub const fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
28537    unsafe {
28538        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
28539        transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
28540    }
28541}
28542
28543/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28544///
28545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
28546#[inline]
28547#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
28548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28550pub const fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
28551    unsafe {
28552        let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
28553        transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
28554    }
28555}
28556
28557/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
28558///
28559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
28560#[inline]
28561#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28563#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28564pub const fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
28565    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28566}
28567
28568/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28569///
28570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
28571#[inline]
28572#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28575pub const fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
28576    unsafe {
28577        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
28578        transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
28579    }
28580}
28581
28582/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28583///
28584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
28585#[inline]
28586#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
28587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28588#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28589pub const fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
28590    unsafe {
28591        let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
28592        transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
28593    }
28594}
28595
28596/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28597///
28598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
28599#[inline]
28600#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
28601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28602#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28603pub const fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
28604    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
28605}
28606
28607/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28608///
28609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
28610#[inline]
28611#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28612#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28613#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28614pub const fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
28615    unsafe {
28616        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
28617        transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
28618    }
28619}
28620
28621/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28622///
28623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
28624#[inline]
28625#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
28626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28627#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28628pub const fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
28629    unsafe {
28630        let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
28631        transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
28632    }
28633}
28634
28635/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
28636///
28637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
28638#[inline]
28639#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
28640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28642pub const fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
28643    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28644}
28645
28646/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28647///
28648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
28649#[inline]
28650#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28653pub const fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
28654    unsafe {
28655        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
28656        transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
28657    }
28658}
28659
28660/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28661///
28662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
28663#[inline]
28664#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
28665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28666#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28667pub const fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
28668    unsafe {
28669        let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
28670        transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
28671    }
28672}
28673
28674/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
28675///
28676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
28677#[inline]
28678#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
28679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28680#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28681pub const fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
28682    unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
28683}
28684
28685/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28686///
28687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
28688#[inline]
28689#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28692pub const fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
28693    unsafe {
28694        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
28695        transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
28696    }
28697}
28698
28699/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28700///
28701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
28702#[inline]
28703#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
28704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28706pub const fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
28707    unsafe {
28708        let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
28709        transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
28710    }
28711}
28712
28713/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28714///
28715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
28716#[inline]
28717#[target_feature(enable = "avx512f")]
28718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28719#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28720#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28721pub const fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28722    unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
28723}
28724
28725/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28726///
28727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
28728#[inline]
28729#[target_feature(enable = "avx512f,avx512vl")]
28730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28731#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28732#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28733pub const fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28734    unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
28735}
28736
28737/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
28738///
28739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
28740#[inline]
28741#[target_feature(enable = "avx512f,avx512vl")]
28742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28743#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
28744#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28745pub const fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28746    unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
28747}
28748
28749/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28750///
28751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
28752#[inline]
28753#[target_feature(enable = "avx512f")]
28754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28755#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28756#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28757pub const fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28758    unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
28759}
28760
28761/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28762///
28763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
28764#[inline]
28765#[target_feature(enable = "avx512f,avx512vl")]
28766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28767#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28769pub const fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28770    unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
28771}
28772
28773/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
28774///
28775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
28776#[inline]
28777#[target_feature(enable = "avx512f,avx512vl")]
28778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28779#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
28780#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28781pub const fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28782    unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
28783}
28784
28785/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28786///
28787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
28788#[inline]
28789#[target_feature(enable = "avx512f")]
28790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28791#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28792#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28793pub const fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
28794    unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
28795}
28796
28797/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28798///
28799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
28800#[inline]
28801#[target_feature(enable = "avx512f,avx512vl")]
28802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28803#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28805pub const fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
28806    unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
28807}
28808
28809/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28810///
28811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
28812#[inline]
28813#[target_feature(enable = "avx512f,avx512vl")]
28814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28815#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
28816#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28817pub const fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
28818    unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
28819}
28820
28821/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28822///
28823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
28824#[inline]
28825#[target_feature(enable = "avx512f")]
28826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28827#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28828#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28829pub const fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
28830    unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
28831}
28832
28833/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28834///
28835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
28836#[inline]
28837#[target_feature(enable = "avx512f,avx512vl")]
28838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28839#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28841pub const fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
28842    unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
28843}
28844
28845/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
28846///
28847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
28848#[inline]
28849#[target_feature(enable = "avx512f,avx512vl")]
28850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28851#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
28852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28853pub const fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
28854    unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
28855}
28856
28857/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
28858///
28859/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
28860///
28861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
28862#[inline]
28863#[target_feature(enable = "avx512f")]
28864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28865#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28866#[rustc_legacy_const_generics(2)]
28867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28868pub const fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
28869    unsafe {
28870        static_assert_uimm_bits!(IMM8, 8);
28871        let a = a.as_i32x16();
28872        let b = b.as_i32x16();
28873        let imm8: i32 = IMM8 % 16;
28874        let r: i32x16 = match imm8 {
28875            0 => simd_shuffle!(
28876                a,
28877                b,
28878                [
28879                    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
28880                ],
28881            ),
28882            1 => simd_shuffle!(
28883                a,
28884                b,
28885                [
28886                    17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
28887                ],
28888            ),
28889            2 => simd_shuffle!(
28890                a,
28891                b,
28892                [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
28893            ),
28894            3 => simd_shuffle!(
28895                a,
28896                b,
28897                [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
28898            ),
28899            4 => simd_shuffle!(
28900                a,
28901                b,
28902                [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
28903            ),
28904            5 => simd_shuffle!(
28905                a,
28906                b,
28907                [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
28908            ),
28909            6 => simd_shuffle!(
28910                a,
28911                b,
28912                [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
28913            ),
28914            7 => simd_shuffle!(
28915                a,
28916                b,
28917                [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
28918            ),
28919            8 => simd_shuffle!(
28920                a,
28921                b,
28922                [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
28923            ),
28924            9 => simd_shuffle!(
28925                a,
28926                b,
28927                [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
28928            ),
28929            10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
28930            11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
28931            12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
28932            13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
28933            14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
28934            15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
28935            _ => unreachable_unchecked(),
28936        };
28937        transmute(r)
28938    }
28939}
28940
28941/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28942///
28943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
28944#[inline]
28945#[target_feature(enable = "avx512f")]
28946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28947#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28948#[rustc_legacy_const_generics(4)]
28949#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28950pub const fn _mm512_mask_alignr_epi32<const IMM8: i32>(
28951    src: __m512i,
28952    k: __mmask16,
28953    a: __m512i,
28954    b: __m512i,
28955) -> __m512i {
28956    unsafe {
28957        static_assert_uimm_bits!(IMM8, 8);
28958        let r = _mm512_alignr_epi32::<IMM8>(a, b);
28959        transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
28960    }
28961}
28962
28963/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28964///
28965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
28966#[inline]
28967#[target_feature(enable = "avx512f")]
28968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28969#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28970#[rustc_legacy_const_generics(3)]
28971#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28972pub const fn _mm512_maskz_alignr_epi32<const IMM8: i32>(
28973    k: __mmask16,
28974    a: __m512i,
28975    b: __m512i,
28976) -> __m512i {
28977    unsafe {
28978        static_assert_uimm_bits!(IMM8, 8);
28979        let r = _mm512_alignr_epi32::<IMM8>(a, b);
28980        transmute(simd_select_bitmask(k, r.as_i32x16(), i32x16::ZERO))
28981    }
28982}
28983
28984/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
28985///
28986/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
28987///
28988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
28989#[inline]
28990#[target_feature(enable = "avx512f,avx512vl")]
28991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28992#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
28993#[rustc_legacy_const_generics(2)]
28994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
28995pub const fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28996    unsafe {
28997        static_assert_uimm_bits!(IMM8, 8);
28998        let a = a.as_i32x8();
28999        let b = b.as_i32x8();
29000        let imm8: i32 = IMM8 % 8;
29001        let r: i32x8 = match imm8 {
29002            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
29003            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
29004            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
29005            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
29006            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
29007            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
29008            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
29009            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
29010            _ => unreachable_unchecked(),
29011        };
29012        transmute(r)
29013    }
29014}
29015
29016/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29017///
29018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
29019#[inline]
29020#[target_feature(enable = "avx512f,avx512vl")]
29021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29022#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29023#[rustc_legacy_const_generics(4)]
29024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29025pub const fn _mm256_mask_alignr_epi32<const IMM8: i32>(
29026    src: __m256i,
29027    k: __mmask8,
29028    a: __m256i,
29029    b: __m256i,
29030) -> __m256i {
29031    unsafe {
29032        static_assert_uimm_bits!(IMM8, 8);
29033        let r = _mm256_alignr_epi32::<IMM8>(a, b);
29034        transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
29035    }
29036}
29037
29038/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29039///
29040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
29041#[inline]
29042#[target_feature(enable = "avx512f,avx512vl")]
29043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29044#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29045#[rustc_legacy_const_generics(3)]
29046#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29047pub const fn _mm256_maskz_alignr_epi32<const IMM8: i32>(
29048    k: __mmask8,
29049    a: __m256i,
29050    b: __m256i,
29051) -> __m256i {
29052    unsafe {
29053        static_assert_uimm_bits!(IMM8, 8);
29054        let r = _mm256_alignr_epi32::<IMM8>(a, b);
29055        transmute(simd_select_bitmask(k, r.as_i32x8(), i32x8::ZERO))
29056    }
29057}
29058
29059/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
29060///
29061/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
29062///
29063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
29064#[inline]
29065#[target_feature(enable = "avx512f,avx512vl")]
29066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29067#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
29068#[rustc_legacy_const_generics(2)]
29069#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29070pub const fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29071    unsafe {
29072        static_assert_uimm_bits!(IMM8, 8);
29073        let a = a.as_i32x4();
29074        let b = b.as_i32x4();
29075        let imm8: i32 = IMM8 % 4;
29076        let r: i32x4 = match imm8 {
29077            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
29078            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
29079            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
29080            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
29081            _ => unreachable_unchecked(),
29082        };
29083        transmute(r)
29084    }
29085}
29086
29087/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29088///
29089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
29090#[inline]
29091#[target_feature(enable = "avx512f,avx512vl")]
29092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29093#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29094#[rustc_legacy_const_generics(4)]
29095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29096pub const fn _mm_mask_alignr_epi32<const IMM8: i32>(
29097    src: __m128i,
29098    k: __mmask8,
29099    a: __m128i,
29100    b: __m128i,
29101) -> __m128i {
29102    unsafe {
29103        static_assert_uimm_bits!(IMM8, 8);
29104        let r = _mm_alignr_epi32::<IMM8>(a, b);
29105        transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
29106    }
29107}
29108
29109/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29110///
29111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
29112#[inline]
29113#[target_feature(enable = "avx512f,avx512vl")]
29114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29115#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
29116#[rustc_legacy_const_generics(3)]
29117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29118pub const fn _mm_maskz_alignr_epi32<const IMM8: i32>(
29119    k: __mmask8,
29120    a: __m128i,
29121    b: __m128i,
29122) -> __m128i {
29123    unsafe {
29124        static_assert_uimm_bits!(IMM8, 8);
29125        let r = _mm_alignr_epi32::<IMM8>(a, b);
29126        transmute(simd_select_bitmask(k, r.as_i32x4(), i32x4::ZERO))
29127    }
29128}
29129
29130/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
29131///
29132/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
29133///
29134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
29135#[inline]
29136#[target_feature(enable = "avx512f")]
29137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29138#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29139#[rustc_legacy_const_generics(2)]
29140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29141pub const fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
29142    unsafe {
29143        static_assert_uimm_bits!(IMM8, 8);
29144        let imm8: i32 = IMM8 % 8;
29145        let r: i64x8 = match imm8 {
29146            0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
29147            1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
29148            2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
29149            3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
29150            4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
29151            5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
29152            6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
29153            7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
29154            _ => unreachable_unchecked(),
29155        };
29156        transmute(r)
29157    }
29158}
29159
29160/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29161///
29162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
29163#[inline]
29164#[target_feature(enable = "avx512f")]
29165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29166#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29167#[rustc_legacy_const_generics(4)]
29168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29169pub const fn _mm512_mask_alignr_epi64<const IMM8: i32>(
29170    src: __m512i,
29171    k: __mmask8,
29172    a: __m512i,
29173    b: __m512i,
29174) -> __m512i {
29175    unsafe {
29176        static_assert_uimm_bits!(IMM8, 8);
29177        let r = _mm512_alignr_epi64::<IMM8>(a, b);
29178        transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
29179    }
29180}
29181
29182/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29183///
29184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
29185#[inline]
29186#[target_feature(enable = "avx512f")]
29187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29188#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29189#[rustc_legacy_const_generics(3)]
29190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29191pub const fn _mm512_maskz_alignr_epi64<const IMM8: i32>(
29192    k: __mmask8,
29193    a: __m512i,
29194    b: __m512i,
29195) -> __m512i {
29196    unsafe {
29197        static_assert_uimm_bits!(IMM8, 8);
29198        let r = _mm512_alignr_epi64::<IMM8>(a, b);
29199        transmute(simd_select_bitmask(k, r.as_i64x8(), i64x8::ZERO))
29200    }
29201}
29202
29203/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
29204///
29205/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
29206///
29207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
29208#[inline]
29209#[target_feature(enable = "avx512f,avx512vl")]
29210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29211#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29212#[rustc_legacy_const_generics(2)]
29213#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29214pub const fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
29215    unsafe {
29216        static_assert_uimm_bits!(IMM8, 8);
29217        let imm8: i32 = IMM8 % 4;
29218        let r: i64x4 = match imm8 {
29219            0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
29220            1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
29221            2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
29222            3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
29223            _ => unreachable_unchecked(),
29224        };
29225        transmute(r)
29226    }
29227}
29228
29229/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29230///
29231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
29232#[inline]
29233#[target_feature(enable = "avx512f,avx512vl")]
29234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29235#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29236#[rustc_legacy_const_generics(4)]
29237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29238pub const fn _mm256_mask_alignr_epi64<const IMM8: i32>(
29239    src: __m256i,
29240    k: __mmask8,
29241    a: __m256i,
29242    b: __m256i,
29243) -> __m256i {
29244    unsafe {
29245        static_assert_uimm_bits!(IMM8, 8);
29246        let r = _mm256_alignr_epi64::<IMM8>(a, b);
29247        transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
29248    }
29249}
29250
29251/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29252///
29253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
29254#[inline]
29255#[target_feature(enable = "avx512f,avx512vl")]
29256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29257#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29258#[rustc_legacy_const_generics(3)]
29259#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29260pub const fn _mm256_maskz_alignr_epi64<const IMM8: i32>(
29261    k: __mmask8,
29262    a: __m256i,
29263    b: __m256i,
29264) -> __m256i {
29265    unsafe {
29266        static_assert_uimm_bits!(IMM8, 8);
29267        let r = _mm256_alignr_epi64::<IMM8>(a, b);
29268        transmute(simd_select_bitmask(k, r.as_i64x4(), i64x4::ZERO))
29269    }
29270}
29271
29272/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
29273///
29274/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
29275///
29276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
29277#[inline]
29278#[target_feature(enable = "avx512f,avx512vl")]
29279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29280#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
29281#[rustc_legacy_const_generics(2)]
29282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29283pub const fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
29284    unsafe {
29285        static_assert_uimm_bits!(IMM8, 8);
29286        let imm8: i32 = IMM8 % 2;
29287        let r: i64x2 = match imm8 {
29288            0 => simd_shuffle!(a, b, [2, 3]),
29289            1 => simd_shuffle!(a, b, [3, 0]),
29290            _ => unreachable_unchecked(),
29291        };
29292        transmute(r)
29293    }
29294}
29295
29296/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29297///
29298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
29299#[inline]
29300#[target_feature(enable = "avx512f,avx512vl")]
29301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29302#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29303#[rustc_legacy_const_generics(4)]
29304#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29305pub const fn _mm_mask_alignr_epi64<const IMM8: i32>(
29306    src: __m128i,
29307    k: __mmask8,
29308    a: __m128i,
29309    b: __m128i,
29310) -> __m128i {
29311    unsafe {
29312        static_assert_uimm_bits!(IMM8, 8);
29313        let r = _mm_alignr_epi64::<IMM8>(a, b);
29314        transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
29315    }
29316}
29317
29318/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29319///
29320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
29321#[inline]
29322#[target_feature(enable = "avx512f,avx512vl")]
29323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29324#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
29325#[rustc_legacy_const_generics(3)]
29326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29327pub const fn _mm_maskz_alignr_epi64<const IMM8: i32>(
29328    k: __mmask8,
29329    a: __m128i,
29330    b: __m128i,
29331) -> __m128i {
29332    unsafe {
29333        static_assert_uimm_bits!(IMM8, 8);
29334        let r = _mm_alignr_epi64::<IMM8>(a, b);
29335        transmute(simd_select_bitmask(k, r.as_i64x2(), i64x2::ZERO))
29336    }
29337}
29338
29339/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
29340///
29341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
29342#[inline]
29343#[target_feature(enable = "avx512f")]
29344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29345#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
29346#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29347pub const fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
29348    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
29349}
29350
29351/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29352///
29353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
29354#[inline]
29355#[target_feature(enable = "avx512f")]
29356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29357#[cfg_attr(test, assert_instr(vpandd))]
29358#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29359pub const fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29360    unsafe {
29361        let and = _mm512_and_epi32(a, b).as_i32x16();
29362        transmute(simd_select_bitmask(k, and, src.as_i32x16()))
29363    }
29364}
29365
29366/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29367///
29368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
29369#[inline]
29370#[target_feature(enable = "avx512f")]
29371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29372#[cfg_attr(test, assert_instr(vpandd))]
29373#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29374pub const fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29375    unsafe {
29376        let and = _mm512_and_epi32(a, b).as_i32x16();
29377        transmute(simd_select_bitmask(k, and, i32x16::ZERO))
29378    }
29379}
29380
29381/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29382///
29383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
29384#[inline]
29385#[target_feature(enable = "avx512f,avx512vl")]
29386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29387#[cfg_attr(test, assert_instr(vpandd))]
29388#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29389pub const fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29390    unsafe {
29391        let and = simd_and(a.as_i32x8(), b.as_i32x8());
29392        transmute(simd_select_bitmask(k, and, src.as_i32x8()))
29393    }
29394}
29395
29396/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29397///
29398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
29399#[inline]
29400#[target_feature(enable = "avx512f,avx512vl")]
29401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29402#[cfg_attr(test, assert_instr(vpandd))]
29403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29404pub const fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29405    unsafe {
29406        let and = simd_and(a.as_i32x8(), b.as_i32x8());
29407        transmute(simd_select_bitmask(k, and, i32x8::ZERO))
29408    }
29409}
29410
29411/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29412///
29413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
29414#[inline]
29415#[target_feature(enable = "avx512f,avx512vl")]
29416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29417#[cfg_attr(test, assert_instr(vpandd))]
29418#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29419pub const fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29420    unsafe {
29421        let and = simd_and(a.as_i32x4(), b.as_i32x4());
29422        transmute(simd_select_bitmask(k, and, src.as_i32x4()))
29423    }
29424}
29425
29426/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29427///
29428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
29429#[inline]
29430#[target_feature(enable = "avx512f,avx512vl")]
29431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29432#[cfg_attr(test, assert_instr(vpandd))]
29433#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29434pub const fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29435    unsafe {
29436        let and = simd_and(a.as_i32x4(), b.as_i32x4());
29437        transmute(simd_select_bitmask(k, and, i32x4::ZERO))
29438    }
29439}
29440
29441/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
29442///
29443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
29444#[inline]
29445#[target_feature(enable = "avx512f")]
29446#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29447#[cfg_attr(test, assert_instr(vpandq))]
29448#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29449pub const fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
29450    unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
29451}
29452
29453/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29454///
29455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
29456#[inline]
29457#[target_feature(enable = "avx512f")]
29458#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29459#[cfg_attr(test, assert_instr(vpandq))]
29460#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29461pub const fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29462    unsafe {
29463        let and = _mm512_and_epi64(a, b).as_i64x8();
29464        transmute(simd_select_bitmask(k, and, src.as_i64x8()))
29465    }
29466}
29467
29468/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29469///
29470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
29471#[inline]
29472#[target_feature(enable = "avx512f")]
29473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29474#[cfg_attr(test, assert_instr(vpandq))]
29475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29476pub const fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29477    unsafe {
29478        let and = _mm512_and_epi64(a, b).as_i64x8();
29479        transmute(simd_select_bitmask(k, and, i64x8::ZERO))
29480    }
29481}
29482
29483/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29484///
29485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
29486#[inline]
29487#[target_feature(enable = "avx512f,avx512vl")]
29488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29489#[cfg_attr(test, assert_instr(vpandq))]
29490#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29491pub const fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29492    unsafe {
29493        let and = simd_and(a.as_i64x4(), b.as_i64x4());
29494        transmute(simd_select_bitmask(k, and, src.as_i64x4()))
29495    }
29496}
29497
29498/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29499///
29500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
29501#[inline]
29502#[target_feature(enable = "avx512f,avx512vl")]
29503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29504#[cfg_attr(test, assert_instr(vpandq))]
29505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29506pub const fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29507    unsafe {
29508        let and = simd_and(a.as_i64x4(), b.as_i64x4());
29509        transmute(simd_select_bitmask(k, and, i64x4::ZERO))
29510    }
29511}
29512
29513/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29514///
29515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
29516#[inline]
29517#[target_feature(enable = "avx512f,avx512vl")]
29518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29519#[cfg_attr(test, assert_instr(vpandq))]
29520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29521pub const fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29522    unsafe {
29523        let and = simd_and(a.as_i64x2(), b.as_i64x2());
29524        transmute(simd_select_bitmask(k, and, src.as_i64x2()))
29525    }
29526}
29527
29528/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29529///
29530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
29531#[inline]
29532#[target_feature(enable = "avx512f,avx512vl")]
29533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29534#[cfg_attr(test, assert_instr(vpandq))]
29535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29536pub const fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29537    unsafe {
29538        let and = simd_and(a.as_i64x2(), b.as_i64x2());
29539        transmute(simd_select_bitmask(k, and, i64x2::ZERO))
29540    }
29541}
29542
29543/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
29544///
29545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
29546#[inline]
29547#[target_feature(enable = "avx512f")]
29548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29549#[cfg_attr(test, assert_instr(vpandq))]
29550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29551pub const fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
29552    unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
29553}
29554
29555/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29556///
29557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
29558#[inline]
29559#[target_feature(enable = "avx512f")]
29560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29561#[cfg_attr(test, assert_instr(vporq))]
29562#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29563pub const fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
29564    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
29565}
29566
29567/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29568///
29569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
29570#[inline]
29571#[target_feature(enable = "avx512f")]
29572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29573#[cfg_attr(test, assert_instr(vpord))]
29574#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29575pub const fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29576    unsafe {
29577        let or = _mm512_or_epi32(a, b).as_i32x16();
29578        transmute(simd_select_bitmask(k, or, src.as_i32x16()))
29579    }
29580}
29581
29582/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29583///
29584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
29585#[inline]
29586#[target_feature(enable = "avx512f")]
29587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29588#[cfg_attr(test, assert_instr(vpord))]
29589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29590pub const fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29591    unsafe {
29592        let or = _mm512_or_epi32(a, b).as_i32x16();
29593        transmute(simd_select_bitmask(k, or, i32x16::ZERO))
29594    }
29595}
29596
29597/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29598///
29599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
29600#[inline]
29601#[target_feature(enable = "avx512f,avx512vl")]
29602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29603#[cfg_attr(test, assert_instr(vor))] //should be vpord
29604#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29605pub const fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
29606    unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
29607}
29608
29609/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29610///
29611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
29612#[inline]
29613#[target_feature(enable = "avx512f,avx512vl")]
29614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29615#[cfg_attr(test, assert_instr(vpord))]
29616#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29617pub const fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29618    unsafe {
29619        let or = _mm256_or_epi32(a, b).as_i32x8();
29620        transmute(simd_select_bitmask(k, or, src.as_i32x8()))
29621    }
29622}
29623
29624/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29625///
29626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
29627#[inline]
29628#[target_feature(enable = "avx512f,avx512vl")]
29629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29630#[cfg_attr(test, assert_instr(vpord))]
29631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29632pub const fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29633    unsafe {
29634        let or = _mm256_or_epi32(a, b).as_i32x8();
29635        transmute(simd_select_bitmask(k, or, i32x8::ZERO))
29636    }
29637}
29638
29639/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
29640///
29641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
29642#[inline]
29643#[target_feature(enable = "avx512f,avx512vl")]
29644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29645#[cfg_attr(test, assert_instr(vor))] //should be vpord
29646#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29647pub const fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
29648    unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
29649}
29650
29651/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29652///
29653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
29654#[inline]
29655#[target_feature(enable = "avx512f,avx512vl")]
29656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29657#[cfg_attr(test, assert_instr(vpord))]
29658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29659pub const fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29660    unsafe {
29661        let or = _mm_or_epi32(a, b).as_i32x4();
29662        transmute(simd_select_bitmask(k, or, src.as_i32x4()))
29663    }
29664}
29665
29666/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29667///
29668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
29669#[inline]
29670#[target_feature(enable = "avx512f,avx512vl")]
29671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29672#[cfg_attr(test, assert_instr(vpord))]
29673#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29674pub const fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29675    unsafe {
29676        let or = _mm_or_epi32(a, b).as_i32x4();
29677        transmute(simd_select_bitmask(k, or, i32x4::ZERO))
29678    }
29679}
29680
29681/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29682///
29683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
29684#[inline]
29685#[target_feature(enable = "avx512f")]
29686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29687#[cfg_attr(test, assert_instr(vporq))]
29688#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29689pub const fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
29690    unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
29691}
29692
29693/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29694///
29695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
29696#[inline]
29697#[target_feature(enable = "avx512f")]
29698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29699#[cfg_attr(test, assert_instr(vporq))]
29700#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29701pub const fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29702    unsafe {
29703        let or = _mm512_or_epi64(a, b).as_i64x8();
29704        transmute(simd_select_bitmask(k, or, src.as_i64x8()))
29705    }
29706}
29707
29708/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29709///
29710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
29711#[inline]
29712#[target_feature(enable = "avx512f")]
29713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29714#[cfg_attr(test, assert_instr(vporq))]
29715#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29716pub const fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29717    unsafe {
29718        let or = _mm512_or_epi64(a, b).as_i64x8();
29719        transmute(simd_select_bitmask(k, or, i64x8::ZERO))
29720    }
29721}
29722
29723/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29724///
29725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
29726#[inline]
29727#[target_feature(enable = "avx512f,avx512vl")]
29728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29729#[cfg_attr(test, assert_instr(vor))] //should be vporq
29730#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29731pub const fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
29732    unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
29733}
29734
29735/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29736///
29737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
29738#[inline]
29739#[target_feature(enable = "avx512f,avx512vl")]
29740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29741#[cfg_attr(test, assert_instr(vporq))]
29742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29743pub const fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29744    unsafe {
29745        let or = _mm256_or_epi64(a, b).as_i64x4();
29746        transmute(simd_select_bitmask(k, or, src.as_i64x4()))
29747    }
29748}
29749
29750/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29751///
29752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
29753#[inline]
29754#[target_feature(enable = "avx512f,avx512vl")]
29755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29756#[cfg_attr(test, assert_instr(vporq))]
29757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29758pub const fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29759    unsafe {
29760        let or = _mm256_or_epi64(a, b).as_i64x4();
29761        transmute(simd_select_bitmask(k, or, i64x4::ZERO))
29762    }
29763}
29764
29765/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
29766///
29767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
29768#[inline]
29769#[target_feature(enable = "avx512f,avx512vl")]
29770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29771#[cfg_attr(test, assert_instr(vor))] //should be vporq
29772#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29773pub const fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
29774    unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
29775}
29776
29777/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29778///
29779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
29780#[inline]
29781#[target_feature(enable = "avx512f,avx512vl")]
29782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29783#[cfg_attr(test, assert_instr(vporq))]
29784#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29785pub const fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29786    unsafe {
29787        let or = _mm_or_epi64(a, b).as_i64x2();
29788        transmute(simd_select_bitmask(k, or, src.as_i64x2()))
29789    }
29790}
29791
29792/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29793///
29794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
29795#[inline]
29796#[target_feature(enable = "avx512f,avx512vl")]
29797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29798#[cfg_attr(test, assert_instr(vporq))]
29799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29800pub const fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29801    unsafe {
29802        let or = _mm_or_epi64(a, b).as_i64x2();
29803        transmute(simd_select_bitmask(k, or, i64x2::ZERO))
29804    }
29805}
29806
29807/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
29808///
29809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
29810#[inline]
29811#[target_feature(enable = "avx512f")]
29812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29813#[cfg_attr(test, assert_instr(vporq))]
29814#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29815pub const fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
29816    unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
29817}
29818
29819/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29820///
29821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
29822#[inline]
29823#[target_feature(enable = "avx512f")]
29824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29825#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
29826#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29827pub const fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
29828    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
29829}
29830
29831/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29832///
29833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
29834#[inline]
29835#[target_feature(enable = "avx512f")]
29836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29837#[cfg_attr(test, assert_instr(vpxord))]
29838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29839pub const fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29840    unsafe {
29841        let xor = _mm512_xor_epi32(a, b).as_i32x16();
29842        transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
29843    }
29844}
29845
29846/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29847///
29848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
29849#[inline]
29850#[target_feature(enable = "avx512f")]
29851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29852#[cfg_attr(test, assert_instr(vpxord))]
29853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29854pub const fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
29855    unsafe {
29856        let xor = _mm512_xor_epi32(a, b).as_i32x16();
29857        transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
29858    }
29859}
29860
29861/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29862///
29863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
29864#[inline]
29865#[target_feature(enable = "avx512f,avx512vl")]
29866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29867#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29868#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29869pub const fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
29870    unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
29871}
29872
29873/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29874///
29875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
29876#[inline]
29877#[target_feature(enable = "avx512f,avx512vl")]
29878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29879#[cfg_attr(test, assert_instr(vpxord))]
29880#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29881pub const fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29882    unsafe {
29883        let xor = _mm256_xor_epi32(a, b).as_i32x8();
29884        transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
29885    }
29886}
29887
29888/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29889///
29890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
29891#[inline]
29892#[target_feature(enable = "avx512f,avx512vl")]
29893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29894#[cfg_attr(test, assert_instr(vpxord))]
29895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29896pub const fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
29897    unsafe {
29898        let xor = _mm256_xor_epi32(a, b).as_i32x8();
29899        transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
29900    }
29901}
29902
29903/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
29904///
29905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
29906#[inline]
29907#[target_feature(enable = "avx512f,avx512vl")]
29908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29909#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
29910#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29911pub const fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
29912    unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
29913}
29914
29915/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29916///
29917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
29918#[inline]
29919#[target_feature(enable = "avx512f,avx512vl")]
29920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29921#[cfg_attr(test, assert_instr(vpxord))]
29922#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29923pub const fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29924    unsafe {
29925        let xor = _mm_xor_epi32(a, b).as_i32x4();
29926        transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
29927    }
29928}
29929
29930/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29931///
29932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
29933#[inline]
29934#[target_feature(enable = "avx512f,avx512vl")]
29935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29936#[cfg_attr(test, assert_instr(vpxord))]
29937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29938pub const fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29939    unsafe {
29940        let xor = _mm_xor_epi32(a, b).as_i32x4();
29941        transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
29942    }
29943}
29944
29945/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29946///
29947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
29948#[inline]
29949#[target_feature(enable = "avx512f")]
29950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29951#[cfg_attr(test, assert_instr(vpxorq))]
29952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29953pub const fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
29954    unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
29955}
29956
29957/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29958///
29959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
29960#[inline]
29961#[target_feature(enable = "avx512f")]
29962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29963#[cfg_attr(test, assert_instr(vpxorq))]
29964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29965pub const fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29966    unsafe {
29967        let xor = _mm512_xor_epi64(a, b).as_i64x8();
29968        transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
29969    }
29970}
29971
29972/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29973///
29974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
29975#[inline]
29976#[target_feature(enable = "avx512f")]
29977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29978#[cfg_attr(test, assert_instr(vpxorq))]
29979#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29980pub const fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
29981    unsafe {
29982        let xor = _mm512_xor_epi64(a, b).as_i64x8();
29983        transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
29984    }
29985}
29986
29987/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
29988///
29989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
29990#[inline]
29991#[target_feature(enable = "avx512f,avx512vl")]
29992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29993#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
29994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
29995pub const fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
29996    unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
29997}
29998
29999/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30000///
30001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
30002#[inline]
30003#[target_feature(enable = "avx512f,avx512vl")]
30004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30005#[cfg_attr(test, assert_instr(vpxorq))]
30006#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30007pub const fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30008    unsafe {
30009        let xor = _mm256_xor_epi64(a, b).as_i64x4();
30010        transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
30011    }
30012}
30013
30014/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30015///
30016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
30017#[inline]
30018#[target_feature(enable = "avx512f,avx512vl")]
30019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30020#[cfg_attr(test, assert_instr(vpxorq))]
30021#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30022pub const fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30023    unsafe {
30024        let xor = _mm256_xor_epi64(a, b).as_i64x4();
30025        transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
30026    }
30027}
30028
30029/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
30030///
30031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
30032#[inline]
30033#[target_feature(enable = "avx512f,avx512vl")]
30034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30035#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
30036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30037pub const fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
30038    unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
30039}
30040
30041/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30042///
30043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
30044#[inline]
30045#[target_feature(enable = "avx512f,avx512vl")]
30046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30047#[cfg_attr(test, assert_instr(vpxorq))]
30048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30049pub const fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30050    unsafe {
30051        let xor = _mm_xor_epi64(a, b).as_i64x2();
30052        transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
30053    }
30054}
30055
30056/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30057///
30058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
30059#[inline]
30060#[target_feature(enable = "avx512f,avx512vl")]
30061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30062#[cfg_attr(test, assert_instr(vpxorq))]
30063#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30064pub const fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30065    unsafe {
30066        let xor = _mm_xor_epi64(a, b).as_i64x2();
30067        transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
30068    }
30069}
30070
30071/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
30072///
30073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
30074#[inline]
30075#[target_feature(enable = "avx512f")]
30076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30077#[cfg_attr(test, assert_instr(vpxorq))]
30078#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30079pub const fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
30080    unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
30081}
30082
30083/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
30084///
30085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
30086#[inline]
30087#[target_feature(enable = "avx512f")]
30088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30089#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30091pub const fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
30092    _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
30093}
30094
30095/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30096///
30097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
30098#[inline]
30099#[target_feature(enable = "avx512f")]
30100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30101#[cfg_attr(test, assert_instr(vpandnd))]
30102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30103pub const fn _mm512_mask_andnot_epi32(
30104    src: __m512i,
30105    k: __mmask16,
30106    a: __m512i,
30107    b: __m512i,
30108) -> __m512i {
30109    unsafe {
30110        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
30111        transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
30112    }
30113}
30114
30115/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30116///
30117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
30118#[inline]
30119#[target_feature(enable = "avx512f")]
30120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30121#[cfg_attr(test, assert_instr(vpandnd))]
30122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30123pub const fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
30124    unsafe {
30125        let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
30126        transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
30127    }
30128}
30129
30130/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30131///
30132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
30133#[inline]
30134#[target_feature(enable = "avx512f,avx512vl")]
30135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30136#[cfg_attr(test, assert_instr(vpandnd))]
30137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30138pub const fn _mm256_mask_andnot_epi32(
30139    src: __m256i,
30140    k: __mmask8,
30141    a: __m256i,
30142    b: __m256i,
30143) -> __m256i {
30144    unsafe {
30145        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
30146        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
30147        transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
30148    }
30149}
30150
30151/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30152///
30153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
30154#[inline]
30155#[target_feature(enable = "avx512f,avx512vl")]
30156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30157#[cfg_attr(test, assert_instr(vpandnd))]
30158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30159pub const fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30160    unsafe {
30161        let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
30162        let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
30163        transmute(simd_select_bitmask(k, andnot, i32x8::ZERO))
30164    }
30165}
30166
30167/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30168///
30169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
30170#[inline]
30171#[target_feature(enable = "avx512f,avx512vl")]
30172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30173#[cfg_attr(test, assert_instr(vpandnd))]
30174#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30175pub const fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30176    unsafe {
30177        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
30178        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
30179        transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
30180    }
30181}
30182
30183/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30184///
30185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
30186#[inline]
30187#[target_feature(enable = "avx512f,avx512vl")]
30188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30189#[cfg_attr(test, assert_instr(vpandnd))]
30190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30191pub const fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30192    unsafe {
30193        let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
30194        let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
30195        transmute(simd_select_bitmask(k, andnot, i32x4::ZERO))
30196    }
30197}
30198
30199/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
30200///
30201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
30202#[inline]
30203#[target_feature(enable = "avx512f")]
30204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30205#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
30206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30207pub const fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
30208    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
30209}
30210
30211/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30212///
30213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
30214#[inline]
30215#[target_feature(enable = "avx512f")]
30216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30217#[cfg_attr(test, assert_instr(vpandnq))]
30218#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30219pub const fn _mm512_mask_andnot_epi64(
30220    src: __m512i,
30221    k: __mmask8,
30222    a: __m512i,
30223    b: __m512i,
30224) -> __m512i {
30225    unsafe {
30226        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
30227        transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
30228    }
30229}
30230
30231/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30232///
30233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
30234#[inline]
30235#[target_feature(enable = "avx512f")]
30236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30237#[cfg_attr(test, assert_instr(vpandnq))]
30238#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30239pub const fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
30240    unsafe {
30241        let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
30242        transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
30243    }
30244}
30245
30246/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30247///
30248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
30249#[inline]
30250#[target_feature(enable = "avx512f,avx512vl")]
30251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30252#[cfg_attr(test, assert_instr(vpandnq))]
30253#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30254pub const fn _mm256_mask_andnot_epi64(
30255    src: __m256i,
30256    k: __mmask8,
30257    a: __m256i,
30258    b: __m256i,
30259) -> __m256i {
30260    unsafe {
30261        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
30262        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
30263        transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
30264    }
30265}
30266
30267/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30268///
30269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
30270#[inline]
30271#[target_feature(enable = "avx512f,avx512vl")]
30272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30273#[cfg_attr(test, assert_instr(vpandnq))]
30274#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30275pub const fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
30276    unsafe {
30277        let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
30278        let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
30279        transmute(simd_select_bitmask(k, andnot, i64x4::ZERO))
30280    }
30281}
30282
30283/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30284///
30285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
30286#[inline]
30287#[target_feature(enable = "avx512f,avx512vl")]
30288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30289#[cfg_attr(test, assert_instr(vpandnq))]
30290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30291pub const fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30292    unsafe {
30293        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
30294        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
30295        transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
30296    }
30297}
30298
30299/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30300///
30301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
30302#[inline]
30303#[target_feature(enable = "avx512f,avx512vl")]
30304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30305#[cfg_attr(test, assert_instr(vpandnq))]
30306#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30307pub const fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
30308    unsafe {
30309        let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
30310        let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
30311        transmute(simd_select_bitmask(k, andnot, i64x2::ZERO))
30312    }
30313}
30314
30315/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
30316///
30317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
30318#[inline]
30319#[target_feature(enable = "avx512f")]
30320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30321#[cfg_attr(test, assert_instr(vpandnq))]
30322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30323pub const fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
30324    _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
30325}
30326
30327/// Convert 16-bit mask a into an integer value, and store the result in dst.
30328///
30329/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
30330#[inline]
30331#[target_feature(enable = "avx512f")]
30332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30333#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30334pub const fn _cvtmask16_u32(a: __mmask16) -> u32 {
30335    a as u32
30336}
30337
30338/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
30339///
30340/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
30341#[inline]
30342#[target_feature(enable = "avx512f")]
30343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30344#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30345pub const fn _cvtu32_mask16(a: u32) -> __mmask16 {
30346    a as __mmask16
30347}
30348
30349/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30350///
30351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
30352#[inline]
30353#[target_feature(enable = "avx512f")]
30354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30355#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30357pub const fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30358    a & b
30359}
30360
30361/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
30362///
30363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
30364#[inline]
30365#[target_feature(enable = "avx512f")]
30366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30367#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
30368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30369pub const fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
30370    a & b
30371}
30372
30373/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30374///
30375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
30376#[inline]
30377#[target_feature(enable = "avx512f")]
30378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30379#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30380#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30381pub const fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30382    a | b
30383}
30384
30385/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
30386///
30387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
30388#[inline]
30389#[target_feature(enable = "avx512f")]
30390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30391#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
30392#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30393pub const fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
30394    a | b
30395}
30396
30397/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30398///
30399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
30400#[inline]
30401#[target_feature(enable = "avx512f")]
30402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30403#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30404#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30405pub const fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30406    a ^ b
30407}
30408
30409/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
30410///
30411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
30412#[inline]
30413#[target_feature(enable = "avx512f")]
30414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30415#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
30416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30417pub const fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
30418    a ^ b
30419}
30420
30421/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30422///
30423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
30424#[inline]
30425#[target_feature(enable = "avx512f")]
30426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30428pub const fn _knot_mask16(a: __mmask16) -> __mmask16 {
30429    a ^ 0b11111111_11111111
30430}
30431
30432/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
30433///
30434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
30435#[inline]
30436#[target_feature(enable = "avx512f")]
30437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30438#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30439pub const fn _mm512_knot(a: __mmask16) -> __mmask16 {
30440    a ^ 0b11111111_11111111
30441}
30442
30443/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30444///
30445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
30446#[inline]
30447#[target_feature(enable = "avx512f")]
30448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30449#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
30450#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30451pub const fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30452    _mm512_kand(_mm512_knot(a), b)
30453}
30454
30455/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
30456///
30457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
30458#[inline]
30459#[target_feature(enable = "avx512f")]
30460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30461#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
30462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30463pub const fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
30464    _mm512_kand(_mm512_knot(a), b)
30465}
30466
30467/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30468///
30469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
30470#[inline]
30471#[target_feature(enable = "avx512f")]
30472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30473#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
30474#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30475pub const fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
30476    _mm512_knot(_mm512_kxor(a, b))
30477}
30478
30479/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
30480///
30481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
30482#[inline]
30483#[target_feature(enable = "avx512f")]
30484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30485#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
30486#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30487pub const fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
30488    _mm512_knot(_mm512_kxor(a, b))
30489}
30490
30491/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30492/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
30493///
30494/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
30495#[inline]
30496#[target_feature(enable = "avx512f")]
30497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30498#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30499pub const unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
30500    let tmp = _kor_mask16(a, b);
30501    *all_ones = (tmp == 0xffff) as u8;
30502    (tmp == 0) as u8
30503}
30504
30505/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
30506/// store 0 in dst.
30507///
30508/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
30509#[inline]
30510#[target_feature(enable = "avx512f")]
30511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30512#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30513pub const fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30514    (_kor_mask16(a, b) == 0xffff) as u8
30515}
30516
30517/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
30518/// store 0 in dst.
30519///
30520/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
30521#[inline]
30522#[target_feature(enable = "avx512f")]
30523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30524#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30525pub const fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
30526    (_kor_mask16(a, b) == 0) as u8
30527}
30528
30529/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
30530///
30531/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
30532#[inline]
30533#[target_feature(enable = "avx512f")]
30534#[rustc_legacy_const_generics(1)]
30535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30537pub const fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30538    a.unbounded_shl(COUNT)
30539}
30540
30541/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
30542///
30543/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
30544#[inline]
30545#[target_feature(enable = "avx512f")]
30546#[rustc_legacy_const_generics(1)]
30547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30549pub const fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
30550    a.unbounded_shr(COUNT)
30551}
30552
30553/// Load 16-bit mask from memory
30554///
30555/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
30556#[inline]
30557#[target_feature(enable = "avx512f")]
30558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30560pub const unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
30561    *mem_addr
30562}
30563
30564/// Store 16-bit mask to memory
30565///
30566/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
30567#[inline]
30568#[target_feature(enable = "avx512f")]
30569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30570#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30571pub const unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
30572    *mem_addr = a;
30573}
30574
30575/// Copy 16-bit mask a to k.
30576///
30577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
30578#[inline]
30579#[target_feature(enable = "avx512f")]
30580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30581#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30582#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30583pub const fn _mm512_kmov(a: __mmask16) -> __mmask16 {
30584    a
30585}
30586
30587/// Converts integer mask into bitmask, storing the result in dst.
30588///
30589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
30590#[inline]
30591#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
30592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30594pub const fn _mm512_int2mask(mask: i32) -> __mmask16 {
30595    mask as u16
30596}
30597
30598/// Converts bit mask k1 into an integer value, storing the results in dst.
30599///
30600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
30601#[inline]
30602#[target_feature(enable = "avx512f")]
30603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30604#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
30605#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30606pub const fn _mm512_mask2int(k1: __mmask16) -> i32 {
30607    k1 as i32
30608}
30609
30610/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
30611///
30612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
30613#[inline]
30614#[target_feature(enable = "avx512f")]
30615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30616#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
30617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30618pub const fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
30619    ((a & 0xff) << 8) | (b & 0xff)
30620}
30621
30622/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
30623///
30624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
30625#[inline]
30626#[target_feature(enable = "avx512f")]
30627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30628#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
30629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30630pub const fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
30631    let r = (a | b) == 0b11111111_11111111;
30632    r as i32
30633}
30634
30635/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
30636///
30637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
30638#[inline]
30639#[target_feature(enable = "avx512f")]
30640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30641#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
30642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30643pub const fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
30644    let r = (a | b) == 0;
30645    r as i32
30646}
30647
30648/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30649///
30650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
30651#[inline]
30652#[target_feature(enable = "avx512f")]
30653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30654#[cfg_attr(test, assert_instr(vptestmd))]
30655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30656pub const fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30657    let and = _mm512_and_epi32(a, b);
30658    let zero = _mm512_setzero_si512();
30659    _mm512_cmpneq_epi32_mask(and, zero)
30660}
30661
30662/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30663///
30664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
30665#[inline]
30666#[target_feature(enable = "avx512f")]
30667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30668#[cfg_attr(test, assert_instr(vptestmd))]
30669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30670pub const fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30671    let and = _mm512_and_epi32(a, b);
30672    let zero = _mm512_setzero_si512();
30673    _mm512_mask_cmpneq_epi32_mask(k, and, zero)
30674}
30675
30676/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30677///
30678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
30679#[inline]
30680#[target_feature(enable = "avx512f,avx512vl")]
30681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30682#[cfg_attr(test, assert_instr(vptestmd))]
30683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30684pub const fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30685    let and = _mm256_and_si256(a, b);
30686    let zero = _mm256_setzero_si256();
30687    _mm256_cmpneq_epi32_mask(and, zero)
30688}
30689
30690/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30691///
30692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
30693#[inline]
30694#[target_feature(enable = "avx512f,avx512vl")]
30695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30696#[cfg_attr(test, assert_instr(vptestmd))]
30697#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30698pub const fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30699    let and = _mm256_and_si256(a, b);
30700    let zero = _mm256_setzero_si256();
30701    _mm256_mask_cmpneq_epi32_mask(k, and, zero)
30702}
30703
30704/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30705///
30706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
30707#[inline]
30708#[target_feature(enable = "avx512f,avx512vl")]
30709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30710#[cfg_attr(test, assert_instr(vptestmd))]
30711#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30712pub const fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30713    let and = _mm_and_si128(a, b);
30714    let zero = _mm_setzero_si128();
30715    _mm_cmpneq_epi32_mask(and, zero)
30716}
30717
30718/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30719///
30720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
30721#[inline]
30722#[target_feature(enable = "avx512f,avx512vl")]
30723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30724#[cfg_attr(test, assert_instr(vptestmd))]
30725#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30726pub const fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30727    let and = _mm_and_si128(a, b);
30728    let zero = _mm_setzero_si128();
30729    _mm_mask_cmpneq_epi32_mask(k, and, zero)
30730}
30731
30732/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30733///
30734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
30735#[inline]
30736#[target_feature(enable = "avx512f")]
30737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30738#[cfg_attr(test, assert_instr(vptestmq))]
30739#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30740pub const fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30741    let and = _mm512_and_epi64(a, b);
30742    let zero = _mm512_setzero_si512();
30743    _mm512_cmpneq_epi64_mask(and, zero)
30744}
30745
30746/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30747///
30748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
30749#[inline]
30750#[target_feature(enable = "avx512f")]
30751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30752#[cfg_attr(test, assert_instr(vptestmq))]
30753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30754pub const fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30755    let and = _mm512_and_epi64(a, b);
30756    let zero = _mm512_setzero_si512();
30757    _mm512_mask_cmpneq_epi64_mask(k, and, zero)
30758}
30759
30760/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30761///
30762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
30763#[inline]
30764#[target_feature(enable = "avx512f,avx512vl")]
30765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30766#[cfg_attr(test, assert_instr(vptestmq))]
30767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30768pub const fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30769    let and = _mm256_and_si256(a, b);
30770    let zero = _mm256_setzero_si256();
30771    _mm256_cmpneq_epi64_mask(and, zero)
30772}
30773
30774/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30775///
30776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
30777#[inline]
30778#[target_feature(enable = "avx512f,avx512vl")]
30779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30780#[cfg_attr(test, assert_instr(vptestmq))]
30781#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30782pub const fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30783    let and = _mm256_and_si256(a, b);
30784    let zero = _mm256_setzero_si256();
30785    _mm256_mask_cmpneq_epi64_mask(k, and, zero)
30786}
30787
30788/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
30789///
30790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
30791#[inline]
30792#[target_feature(enable = "avx512f,avx512vl")]
30793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30794#[cfg_attr(test, assert_instr(vptestmq))]
30795#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30796pub const fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30797    let and = _mm_and_si128(a, b);
30798    let zero = _mm_setzero_si128();
30799    _mm_cmpneq_epi64_mask(and, zero)
30800}
30801
30802/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
30803///
30804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
30805#[inline]
30806#[target_feature(enable = "avx512f,avx512vl")]
30807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30808#[cfg_attr(test, assert_instr(vptestmq))]
30809#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30810pub const fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30811    let and = _mm_and_si128(a, b);
30812    let zero = _mm_setzero_si128();
30813    _mm_mask_cmpneq_epi64_mask(k, and, zero)
30814}
30815
30816/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30817///
30818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
30819#[inline]
30820#[target_feature(enable = "avx512f")]
30821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30822#[cfg_attr(test, assert_instr(vptestnmd))]
30823#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30824pub const fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30825    let and = _mm512_and_epi32(a, b);
30826    let zero = _mm512_setzero_si512();
30827    _mm512_cmpeq_epi32_mask(and, zero)
30828}
30829
30830/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30831///
30832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
30833#[inline]
30834#[target_feature(enable = "avx512f")]
30835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30836#[cfg_attr(test, assert_instr(vptestnmd))]
30837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30838pub const fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30839    let and = _mm512_and_epi32(a, b);
30840    let zero = _mm512_setzero_si512();
30841    _mm512_mask_cmpeq_epi32_mask(k, and, zero)
30842}
30843
30844/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30845///
30846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
30847#[inline]
30848#[target_feature(enable = "avx512f,avx512vl")]
30849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30850#[cfg_attr(test, assert_instr(vptestnmd))]
30851#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30852pub const fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30853    let and = _mm256_and_si256(a, b);
30854    let zero = _mm256_setzero_si256();
30855    _mm256_cmpeq_epi32_mask(and, zero)
30856}
30857
30858/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30859///
30860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
30861#[inline]
30862#[target_feature(enable = "avx512f,avx512vl")]
30863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30864#[cfg_attr(test, assert_instr(vptestnmd))]
30865#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30866pub const fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30867    let and = _mm256_and_si256(a, b);
30868    let zero = _mm256_setzero_si256();
30869    _mm256_mask_cmpeq_epi32_mask(k, and, zero)
30870}
30871
30872/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30873///
30874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
30875#[inline]
30876#[target_feature(enable = "avx512f,avx512vl")]
30877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30878#[cfg_attr(test, assert_instr(vptestnmd))]
30879#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30880pub const fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30881    let and = _mm_and_si128(a, b);
30882    let zero = _mm_setzero_si128();
30883    _mm_cmpeq_epi32_mask(and, zero)
30884}
30885
30886/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30887///
30888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
30889#[inline]
30890#[target_feature(enable = "avx512f,avx512vl")]
30891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30892#[cfg_attr(test, assert_instr(vptestnmd))]
30893#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30894pub const fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30895    let and = _mm_and_si128(a, b);
30896    let zero = _mm_setzero_si128();
30897    _mm_mask_cmpeq_epi32_mask(k, and, zero)
30898}
30899
30900/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30901///
30902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
30903#[inline]
30904#[target_feature(enable = "avx512f")]
30905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30906#[cfg_attr(test, assert_instr(vptestnmq))]
30907#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30908pub const fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
30909    let and = _mm512_and_epi64(a, b);
30910    let zero = _mm512_setzero_si512();
30911    _mm512_cmpeq_epi64_mask(and, zero)
30912}
30913
30914/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30915///
30916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
30917#[inline]
30918#[target_feature(enable = "avx512f")]
30919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30920#[cfg_attr(test, assert_instr(vptestnmq))]
30921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30922pub const fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
30923    let and = _mm512_and_epi64(a, b);
30924    let zero = _mm512_setzero_si512();
30925    _mm512_mask_cmpeq_epi64_mask(k, and, zero)
30926}
30927
30928/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30929///
30930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
30931#[inline]
30932#[target_feature(enable = "avx512f,avx512vl")]
30933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30934#[cfg_attr(test, assert_instr(vptestnmq))]
30935#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30936pub const fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
30937    let and = _mm256_and_si256(a, b);
30938    let zero = _mm256_setzero_si256();
30939    _mm256_cmpeq_epi64_mask(and, zero)
30940}
30941
30942/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30943///
30944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
30945#[inline]
30946#[target_feature(enable = "avx512f,avx512vl")]
30947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30948#[cfg_attr(test, assert_instr(vptestnmq))]
30949#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30950pub const fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30951    let and = _mm256_and_si256(a, b);
30952    let zero = _mm256_setzero_si256();
30953    _mm256_mask_cmpeq_epi64_mask(k, and, zero)
30954}
30955
30956/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
30957///
30958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
30959#[inline]
30960#[target_feature(enable = "avx512f,avx512vl")]
30961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30962#[cfg_attr(test, assert_instr(vptestnmq))]
30963#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30964pub const fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
30965    let and = _mm_and_si128(a, b);
30966    let zero = _mm_setzero_si128();
30967    _mm_cmpeq_epi64_mask(and, zero)
30968}
30969
30970/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
30971///
30972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
30973#[inline]
30974#[target_feature(enable = "avx512f,avx512vl")]
30975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30976#[cfg_attr(test, assert_instr(vptestnmq))]
30977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
30978pub const fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30979    let and = _mm_and_si128(a, b);
30980    let zero = _mm_setzero_si128();
30981    _mm_mask_cmpeq_epi64_mask(k, and, zero)
30982}
30983
30984/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
30985///
30986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
30987///
30988/// # Safety of non-temporal stores
30989///
30990/// After using this intrinsic, but before any other access to the memory that this intrinsic
30991/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
30992/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
30993/// return.
30994///
30995/// See [`_mm_sfence`] for details.
30996#[inline]
30997#[target_feature(enable = "avx512f")]
30998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30999#[cfg_attr(test, assert_instr(vmovntps))]
31000#[allow(clippy::cast_ptr_alignment)]
31001pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
31002    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31003    crate::arch::asm!(
31004        vps!("vmovntps", ",{a}"),
31005        p = in(reg) mem_addr,
31006        a = in(zmm_reg) a,
31007        options(nostack, preserves_flags),
31008    );
31009}
31010
31011/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31012///
31013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
31014///
31015/// # Safety of non-temporal stores
31016///
31017/// After using this intrinsic, but before any other access to the memory that this intrinsic
31018/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31019/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31020/// return.
31021///
31022/// See [`_mm_sfence`] for details.
31023#[inline]
31024#[target_feature(enable = "avx512f")]
31025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31026#[cfg_attr(test, assert_instr(vmovntpd))]
31027#[allow(clippy::cast_ptr_alignment)]
31028pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
31029    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31030    crate::arch::asm!(
31031        vps!("vmovntpd", ",{a}"),
31032        p = in(reg) mem_addr,
31033        a = in(zmm_reg) a,
31034        options(nostack, preserves_flags),
31035    );
31036}
31037
31038/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
31039///
31040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
31041///
31042/// # Safety of non-temporal stores
31043///
31044/// After using this intrinsic, but before any other access to the memory that this intrinsic
31045/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
31046/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
31047/// return.
31048///
31049/// See [`_mm_sfence`] for details.
31050#[inline]
31051#[target_feature(enable = "avx512f")]
31052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31053#[cfg_attr(test, assert_instr(vmovntdq))]
31054#[allow(clippy::cast_ptr_alignment)]
31055pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
31056    // see #1541, we should use inline asm to be sure, because LangRef isn't clear enough
31057    crate::arch::asm!(
31058        vps!("vmovntdq", ",{a}"),
31059        p = in(reg) mem_addr,
31060        a = in(zmm_reg) a,
31061        options(nostack, preserves_flags),
31062    );
31063}
31064
31065/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
31066/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
31067/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
31068///
31069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
31070#[inline]
31071#[target_feature(enable = "avx512f")]
31072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31073pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
31074    let dst: __m512i;
31075    crate::arch::asm!(
31076        vpl!("vmovntdqa {a}"),
31077        a = out(zmm_reg) dst,
31078        p = in(reg) mem_addr,
31079        options(pure, readonly, nostack, preserves_flags),
31080    );
31081    dst
31082}
31083
31084/// Sets packed 32-bit integers in `dst` with the supplied values.
31085///
31086/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
31087#[inline]
31088#[target_feature(enable = "avx512f")]
31089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31091pub const fn _mm512_set_ps(
31092    e0: f32,
31093    e1: f32,
31094    e2: f32,
31095    e3: f32,
31096    e4: f32,
31097    e5: f32,
31098    e6: f32,
31099    e7: f32,
31100    e8: f32,
31101    e9: f32,
31102    e10: f32,
31103    e11: f32,
31104    e12: f32,
31105    e13: f32,
31106    e14: f32,
31107    e15: f32,
31108) -> __m512 {
31109    _mm512_setr_ps(
31110        e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
31111    )
31112}
31113
31114/// Sets packed 32-bit integers in `dst` with the supplied values in
31115/// reverse order.
31116///
31117/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
31118#[inline]
31119#[target_feature(enable = "avx512f")]
31120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31122pub const fn _mm512_setr_ps(
31123    e0: f32,
31124    e1: f32,
31125    e2: f32,
31126    e3: f32,
31127    e4: f32,
31128    e5: f32,
31129    e6: f32,
31130    e7: f32,
31131    e8: f32,
31132    e9: f32,
31133    e10: f32,
31134    e11: f32,
31135    e12: f32,
31136    e13: f32,
31137    e14: f32,
31138    e15: f32,
31139) -> __m512 {
31140    unsafe {
31141        let r = f32x16::new(
31142            e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
31143        );
31144        transmute(r)
31145    }
31146}
31147
31148/// Broadcast 64-bit float `a` to all elements of `dst`.
31149///
31150/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
31151#[inline]
31152#[target_feature(enable = "avx512f")]
31153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31155pub const fn _mm512_set1_pd(a: f64) -> __m512d {
31156    unsafe { transmute(f64x8::splat(a)) }
31157}
31158
31159/// Broadcast 32-bit float `a` to all elements of `dst`.
31160///
31161/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
31162#[inline]
31163#[target_feature(enable = "avx512f")]
31164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31165#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31166pub const fn _mm512_set1_ps(a: f32) -> __m512 {
31167    unsafe { transmute(f32x16::splat(a)) }
31168}
31169
31170/// Sets packed 32-bit integers in `dst` with the supplied values.
31171///
31172/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
31173#[inline]
31174#[target_feature(enable = "avx512f")]
31175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31176#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31177pub const fn _mm512_set_epi32(
31178    e15: i32,
31179    e14: i32,
31180    e13: i32,
31181    e12: i32,
31182    e11: i32,
31183    e10: i32,
31184    e9: i32,
31185    e8: i32,
31186    e7: i32,
31187    e6: i32,
31188    e5: i32,
31189    e4: i32,
31190    e3: i32,
31191    e2: i32,
31192    e1: i32,
31193    e0: i32,
31194) -> __m512i {
31195    _mm512_setr_epi32(
31196        e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
31197    )
31198}
31199
31200/// Broadcast 8-bit integer a to all elements of dst.
31201///
31202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
31203#[inline]
31204#[target_feature(enable = "avx512f")]
31205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31207pub const fn _mm512_set1_epi8(a: i8) -> __m512i {
31208    unsafe { transmute(i8x64::splat(a)) }
31209}
31210
31211/// Broadcast the low packed 16-bit integer from a to all elements of dst.
31212///
31213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
31214#[inline]
31215#[target_feature(enable = "avx512f")]
31216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31217#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31218pub const fn _mm512_set1_epi16(a: i16) -> __m512i {
31219    unsafe { transmute(i16x32::splat(a)) }
31220}
31221
31222/// Broadcast 32-bit integer `a` to all elements of `dst`.
31223///
31224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
31225#[inline]
31226#[target_feature(enable = "avx512f")]
31227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31229pub const fn _mm512_set1_epi32(a: i32) -> __m512i {
31230    unsafe { transmute(i32x16::splat(a)) }
31231}
31232
31233/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31234///
31235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
31236#[inline]
31237#[target_feature(enable = "avx512f")]
31238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31239#[cfg_attr(test, assert_instr(vpbroadcastd))]
31240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31241pub const fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
31242    unsafe {
31243        let r = _mm512_set1_epi32(a).as_i32x16();
31244        transmute(simd_select_bitmask(k, r, src.as_i32x16()))
31245    }
31246}
31247
31248/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31249///
31250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
31251#[inline]
31252#[target_feature(enable = "avx512f")]
31253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31254#[cfg_attr(test, assert_instr(vpbroadcastd))]
31255#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31256pub const fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
31257    unsafe {
31258        let r = _mm512_set1_epi32(a).as_i32x16();
31259        transmute(simd_select_bitmask(k, r, i32x16::ZERO))
31260    }
31261}
31262
31263/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31264///
31265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
31266#[inline]
31267#[target_feature(enable = "avx512f,avx512vl")]
31268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31269#[cfg_attr(test, assert_instr(vpbroadcastd))]
31270#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31271pub const fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
31272    unsafe {
31273        let r = _mm256_set1_epi32(a).as_i32x8();
31274        transmute(simd_select_bitmask(k, r, src.as_i32x8()))
31275    }
31276}
31277
31278/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31279///
31280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
31281#[inline]
31282#[target_feature(enable = "avx512f,avx512vl")]
31283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31284#[cfg_attr(test, assert_instr(vpbroadcastd))]
31285#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31286pub const fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
31287    unsafe {
31288        let r = _mm256_set1_epi32(a).as_i32x8();
31289        transmute(simd_select_bitmask(k, r, i32x8::ZERO))
31290    }
31291}
31292
31293/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31294///
31295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
31296#[inline]
31297#[target_feature(enable = "avx512f,avx512vl")]
31298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31299#[cfg_attr(test, assert_instr(vpbroadcastd))]
31300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31301pub const fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
31302    unsafe {
31303        let r = _mm_set1_epi32(a).as_i32x4();
31304        transmute(simd_select_bitmask(k, r, src.as_i32x4()))
31305    }
31306}
31307
31308/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31309///
31310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
31311#[inline]
31312#[target_feature(enable = "avx512f,avx512vl")]
31313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31314#[cfg_attr(test, assert_instr(vpbroadcastd))]
31315#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31316pub const fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
31317    unsafe {
31318        let r = _mm_set1_epi32(a).as_i32x4();
31319        transmute(simd_select_bitmask(k, r, i32x4::ZERO))
31320    }
31321}
31322
31323/// Broadcast 64-bit integer `a` to all elements of `dst`.
31324///
31325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
31326#[inline]
31327#[target_feature(enable = "avx512f")]
31328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31329#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31330pub const fn _mm512_set1_epi64(a: i64) -> __m512i {
31331    unsafe { transmute(i64x8::splat(a)) }
31332}
31333
31334/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31335///
31336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
31337#[inline]
31338#[target_feature(enable = "avx512f")]
31339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31340#[cfg_attr(test, assert_instr(vpbroadcastq))]
31341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31342pub const fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
31343    unsafe {
31344        let r = _mm512_set1_epi64(a).as_i64x8();
31345        transmute(simd_select_bitmask(k, r, src.as_i64x8()))
31346    }
31347}
31348
31349/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31350///
31351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
31352#[inline]
31353#[target_feature(enable = "avx512f")]
31354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31355#[cfg_attr(test, assert_instr(vpbroadcastq))]
31356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31357pub const fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
31358    unsafe {
31359        let r = _mm512_set1_epi64(a).as_i64x8();
31360        transmute(simd_select_bitmask(k, r, i64x8::ZERO))
31361    }
31362}
31363
31364/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31365///
31366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
31367#[inline]
31368#[target_feature(enable = "avx512f,avx512vl")]
31369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31370#[cfg_attr(test, assert_instr(vpbroadcastq))]
31371#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31372pub const fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
31373    unsafe {
31374        let r = _mm256_set1_epi64x(a).as_i64x4();
31375        transmute(simd_select_bitmask(k, r, src.as_i64x4()))
31376    }
31377}
31378
31379/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31380///
31381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
31382#[inline]
31383#[target_feature(enable = "avx512f,avx512vl")]
31384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31385#[cfg_attr(test, assert_instr(vpbroadcastq))]
31386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31387pub const fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
31388    unsafe {
31389        let r = _mm256_set1_epi64x(a).as_i64x4();
31390        transmute(simd_select_bitmask(k, r, i64x4::ZERO))
31391    }
31392}
31393
31394/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
31395///
31396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
31397#[inline]
31398#[target_feature(enable = "avx512f,avx512vl")]
31399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31400#[cfg_attr(test, assert_instr(vpbroadcastq))]
31401#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31402pub const fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
31403    unsafe {
31404        let r = _mm_set1_epi64x(a).as_i64x2();
31405        transmute(simd_select_bitmask(k, r, src.as_i64x2()))
31406    }
31407}
31408
31409/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
31410///
31411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
31412#[inline]
31413#[target_feature(enable = "avx512f,avx512vl")]
31414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31415#[cfg_attr(test, assert_instr(vpbroadcastq))]
31416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31417pub const fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
31418    unsafe {
31419        let r = _mm_set1_epi64x(a).as_i64x2();
31420        transmute(simd_select_bitmask(k, r, i64x2::ZERO))
31421    }
31422}
31423
31424/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
31425///
31426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
31427#[inline]
31428#[target_feature(enable = "avx512f")]
31429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31430#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31431pub const fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31432    _mm512_set_epi64(d, c, b, a, d, c, b, a)
31433}
31434
31435/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
31436///
31437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
31438#[inline]
31439#[target_feature(enable = "avx512f")]
31440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31441#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
31442pub const fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
31443    _mm512_set_epi64(a, b, c, d, a, b, c, d)
31444}
31445
31446/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31447///
31448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
31449#[inline]
31450#[target_feature(enable = "avx512f")]
31451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31452#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31453pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31454    _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
31455}
31456
31457/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31458///
31459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
31460#[inline]
31461#[target_feature(enable = "avx512f")]
31462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31463#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31464pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31465    _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
31466}
31467
31468/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31469///
31470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
31471#[inline]
31472#[target_feature(enable = "avx512f")]
31473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31474#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31475pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31476    _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
31477}
31478
31479/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31480///
31481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
31482#[inline]
31483#[target_feature(enable = "avx512f")]
31484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31485#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31486pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31487    _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
31488}
31489
31490/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31491///
31492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
31493#[inline]
31494#[target_feature(enable = "avx512f")]
31495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31496#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31497pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31498    _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
31499}
31500
31501/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31502///
31503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
31504#[inline]
31505#[target_feature(enable = "avx512f")]
31506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31507#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31508pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31509    _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
31510}
31511
31512/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31513///
31514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
31515#[inline]
31516#[target_feature(enable = "avx512f")]
31517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31518#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31519pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31520    _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
31521}
31522
31523/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31524///
31525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
31526#[inline]
31527#[target_feature(enable = "avx512f")]
31528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31529#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31530pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31531    _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
31532}
31533
31534/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31535///
31536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
31537#[inline]
31538#[target_feature(enable = "avx512f")]
31539#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31540#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31541pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31542    _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
31543}
31544
31545/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31546///
31547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
31548#[inline]
31549#[target_feature(enable = "avx512f")]
31550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31551#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31552pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31553    _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
31554}
31555
31556/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31557///
31558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
31559#[inline]
31560#[target_feature(enable = "avx512f")]
31561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31562#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31563pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31564    _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
31565}
31566
31567/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31568///
31569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
31570#[inline]
31571#[target_feature(enable = "avx512f")]
31572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31573#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31574pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31575    _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
31576}
31577
31578/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31579///
31580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
31581#[inline]
31582#[target_feature(enable = "avx512f")]
31583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31584#[rustc_legacy_const_generics(2)]
31585#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31586pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
31587    unsafe {
31588        static_assert_uimm_bits!(IMM8, 5);
31589        let neg_one = -1;
31590        let a = a.as_f32x16();
31591        let b = b.as_f32x16();
31592        let r = vcmpps(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
31593        r.cast_unsigned()
31594    }
31595}
31596
31597/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31598///
31599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
31600#[inline]
31601#[target_feature(enable = "avx512f")]
31602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31603#[rustc_legacy_const_generics(3)]
31604#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31605pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31606    unsafe {
31607        static_assert_uimm_bits!(IMM8, 5);
31608        let a = a.as_f32x16();
31609        let b = b.as_f32x16();
31610        let r = vcmpps(a, b, IMM8, k1 as i16, _MM_FROUND_CUR_DIRECTION);
31611        r.cast_unsigned()
31612    }
31613}
31614
31615/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31616///
31617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
31618#[inline]
31619#[target_feature(enable = "avx512f,avx512vl")]
31620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31621#[rustc_legacy_const_generics(2)]
31622#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31623pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
31624    unsafe {
31625        static_assert_uimm_bits!(IMM8, 5);
31626        let neg_one = -1;
31627        let a = a.as_f32x8();
31628        let b = b.as_f32x8();
31629        let r = vcmpps256(a, b, IMM8, neg_one);
31630        r.cast_unsigned()
31631    }
31632}
31633
31634/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31635///
31636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
31637#[inline]
31638#[target_feature(enable = "avx512f,avx512vl")]
31639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31640#[rustc_legacy_const_generics(3)]
31641#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31642pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
31643    unsafe {
31644        static_assert_uimm_bits!(IMM8, 5);
31645        let a = a.as_f32x8();
31646        let b = b.as_f32x8();
31647        let r = vcmpps256(a, b, IMM8, k1 as i8);
31648        r.cast_unsigned()
31649    }
31650}
31651
31652/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31653///
31654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
31655#[inline]
31656#[target_feature(enable = "avx512f,avx512vl")]
31657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31658#[rustc_legacy_const_generics(2)]
31659#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31660pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
31661    unsafe {
31662        static_assert_uimm_bits!(IMM8, 5);
31663        let neg_one = -1;
31664        let a = a.as_f32x4();
31665        let b = b.as_f32x4();
31666        let r = vcmpps128(a, b, IMM8, neg_one);
31667        r.cast_unsigned()
31668    }
31669}
31670
31671/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31672///
31673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
31674#[inline]
31675#[target_feature(enable = "avx512f,avx512vl")]
31676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31677#[rustc_legacy_const_generics(3)]
31678#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31679pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
31680    unsafe {
31681        static_assert_uimm_bits!(IMM8, 5);
31682        let a = a.as_f32x4();
31683        let b = b.as_f32x4();
31684        let r = vcmpps128(a, b, IMM8, k1 as i8);
31685        r.cast_unsigned()
31686    }
31687}
31688
31689/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
31690/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31691///
31692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
31693#[inline]
31694#[target_feature(enable = "avx512f")]
31695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31696#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
31697#[rustc_legacy_const_generics(2, 3)]
31698pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31699    a: __m512,
31700    b: __m512,
31701) -> __mmask16 {
31702    unsafe {
31703        static_assert_uimm_bits!(IMM5, 5);
31704        static_assert_mantissas_sae!(SAE);
31705        let neg_one = -1;
31706        let a = a.as_f32x16();
31707        let b = b.as_f32x16();
31708        let r = vcmpps(a, b, IMM5, neg_one, SAE);
31709        r.cast_unsigned()
31710    }
31711}
31712
31713/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
31714/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
31715///
31716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
31717#[inline]
31718#[target_feature(enable = "avx512f")]
31719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31720#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
31721#[rustc_legacy_const_generics(3, 4)]
31722pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
31723    m: __mmask16,
31724    a: __m512,
31725    b: __m512,
31726) -> __mmask16 {
31727    unsafe {
31728        static_assert_uimm_bits!(IMM5, 5);
31729        static_assert_mantissas_sae!(SAE);
31730        let a = a.as_f32x16();
31731        let b = b.as_f32x16();
31732        let r = vcmpps(a, b, IMM5, m as i16, SAE);
31733        r.cast_unsigned()
31734    }
31735}
31736
31737/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
31738///
31739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
31740#[inline]
31741#[target_feature(enable = "avx512f")]
31742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31743#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
31744pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31745    _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
31746}
31747
31748/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31749///
31750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
31751#[inline]
31752#[target_feature(enable = "avx512f")]
31753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31754#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31755pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31756    _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
31757}
31758
31759/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
31760///
31761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
31762#[inline]
31763#[target_feature(enable = "avx512f")]
31764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31765#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31766pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
31767    _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
31768}
31769
31770/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31771///
31772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
31773#[inline]
31774#[target_feature(enable = "avx512f")]
31775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31776#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
31777pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
31778    _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
31779}
31780
31781/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
31782///
31783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
31784#[inline]
31785#[target_feature(enable = "avx512f")]
31786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31787#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31788pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31789    _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
31790}
31791
31792/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31793///
31794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
31795#[inline]
31796#[target_feature(enable = "avx512f")]
31797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31798#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31799pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31800    _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
31801}
31802
31803/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
31804///
31805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
31806#[inline]
31807#[target_feature(enable = "avx512f")]
31808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31809#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31810pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31811    _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
31812}
31813
31814/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31815///
31816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
31817#[inline]
31818#[target_feature(enable = "avx512f")]
31819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31820#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31821pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31822    _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(m, a, b)
31823}
31824
31825/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
31826///
31827/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
31828#[inline]
31829#[target_feature(enable = "avx512f")]
31830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31831#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31832pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31833    _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
31834}
31835
31836/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31837///
31838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
31839#[inline]
31840#[target_feature(enable = "avx512f")]
31841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31842#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31843pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31844    _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
31845}
31846
31847/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
31848///
31849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
31850#[inline]
31851#[target_feature(enable = "avx512f")]
31852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31853#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31854pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31855    _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
31856}
31857
31858/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31859///
31860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
31861#[inline]
31862#[target_feature(enable = "avx512f")]
31863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31864#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31865pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31866    _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
31867}
31868
31869/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
31870///
31871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
31872#[inline]
31873#[target_feature(enable = "avx512f")]
31874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31875#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31876pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31877    _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
31878}
31879
31880/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31881///
31882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
31883#[inline]
31884#[target_feature(enable = "avx512f")]
31885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31886#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31887pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31888    _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
31889}
31890
31891/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
31892///
31893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
31894#[inline]
31895#[target_feature(enable = "avx512f")]
31896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31897#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31898pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
31899    _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
31900}
31901
31902/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31903///
31904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
31905#[inline]
31906#[target_feature(enable = "avx512f")]
31907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31908#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
31909pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31910    _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
31911}
31912
31913/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31914///
31915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
31916#[inline]
31917#[target_feature(enable = "avx512f")]
31918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31919#[rustc_legacy_const_generics(2)]
31920#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31921pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
31922    unsafe {
31923        static_assert_uimm_bits!(IMM8, 5);
31924        let neg_one = -1;
31925        let a = a.as_f64x8();
31926        let b = b.as_f64x8();
31927        let r = vcmppd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
31928        r.cast_unsigned()
31929    }
31930}
31931
31932/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31933///
31934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
31935#[inline]
31936#[target_feature(enable = "avx512f")]
31937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31938#[rustc_legacy_const_generics(3)]
31939#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31940pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
31941    unsafe {
31942        static_assert_uimm_bits!(IMM8, 5);
31943        let a = a.as_f64x8();
31944        let b = b.as_f64x8();
31945        let r = vcmppd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
31946        r.cast_unsigned()
31947    }
31948}
31949
31950/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31951///
31952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
31953#[inline]
31954#[target_feature(enable = "avx512f,avx512vl")]
31955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31956#[rustc_legacy_const_generics(2)]
31957#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31958pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
31959    unsafe {
31960        static_assert_uimm_bits!(IMM8, 5);
31961        let neg_one = -1;
31962        let a = a.as_f64x4();
31963        let b = b.as_f64x4();
31964        let r = vcmppd256(a, b, IMM8, neg_one);
31965        r.cast_unsigned()
31966    }
31967}
31968
31969/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31970///
31971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
31972#[inline]
31973#[target_feature(enable = "avx512f,avx512vl")]
31974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31975#[rustc_legacy_const_generics(3)]
31976#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31977pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
31978    unsafe {
31979        static_assert_uimm_bits!(IMM8, 5);
31980        let a = a.as_f64x4();
31981        let b = b.as_f64x4();
31982        let r = vcmppd256(a, b, IMM8, k1 as i8);
31983        r.cast_unsigned()
31984    }
31985}
31986
31987/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31988///
31989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
31990#[inline]
31991#[target_feature(enable = "avx512f,avx512vl")]
31992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31993#[rustc_legacy_const_generics(2)]
31994#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
31995pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
31996    unsafe {
31997        static_assert_uimm_bits!(IMM8, 5);
31998        let neg_one = -1;
31999        let a = a.as_f64x2();
32000        let b = b.as_f64x2();
32001        let r = vcmppd128(a, b, IMM8, neg_one);
32002        r.cast_unsigned()
32003    }
32004}
32005
32006/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32007///
32008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
32009#[inline]
32010#[target_feature(enable = "avx512f,avx512vl")]
32011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32012#[rustc_legacy_const_generics(3)]
32013#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32014pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32015    unsafe {
32016        static_assert_uimm_bits!(IMM8, 5);
32017        let a = a.as_f64x2();
32018        let b = b.as_f64x2();
32019        let r = vcmppd128(a, b, IMM8, k1 as i8);
32020        r.cast_unsigned()
32021    }
32022}
32023
32024/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
32025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32026///
32027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
32028#[inline]
32029#[target_feature(enable = "avx512f")]
32030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32031#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32032#[rustc_legacy_const_generics(2, 3)]
32033pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32034    a: __m512d,
32035    b: __m512d,
32036) -> __mmask8 {
32037    unsafe {
32038        static_assert_uimm_bits!(IMM5, 5);
32039        static_assert_mantissas_sae!(SAE);
32040        let neg_one = -1;
32041        let a = a.as_f64x8();
32042        let b = b.as_f64x8();
32043        let r = vcmppd(a, b, IMM5, neg_one, SAE);
32044        r.cast_unsigned()
32045    }
32046}
32047
32048/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
32049/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32050///
32051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
32052#[inline]
32053#[target_feature(enable = "avx512f")]
32054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32055#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32056#[rustc_legacy_const_generics(3, 4)]
32057pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
32058    k1: __mmask8,
32059    a: __m512d,
32060    b: __m512d,
32061) -> __mmask8 {
32062    unsafe {
32063        static_assert_uimm_bits!(IMM5, 5);
32064        static_assert_mantissas_sae!(SAE);
32065        let a = a.as_f64x8();
32066        let b = b.as_f64x8();
32067        let r = vcmppd(a, b, IMM5, k1 as i8, SAE);
32068        r.cast_unsigned()
32069    }
32070}
32071
32072/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
32073///
32074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
32075#[inline]
32076#[target_feature(enable = "avx512f")]
32077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32078#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32079pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32080    _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
32081}
32082
32083/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32084///
32085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
32086#[inline]
32087#[target_feature(enable = "avx512f")]
32088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32089#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32090pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32091    _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
32092}
32093
32094/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
32095///
32096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
32097#[inline]
32098#[target_feature(enable = "avx512f")]
32099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32100#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32101pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
32102    _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
32103}
32104
32105/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32106///
32107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
32108#[inline]
32109#[target_feature(enable = "avx512f")]
32110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32111#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
32112pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
32113    _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
32114}
32115
32116/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32117///
32118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
32119#[inline]
32120#[target_feature(enable = "avx512f")]
32121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32122#[rustc_legacy_const_generics(2)]
32123#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32124pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
32125    unsafe {
32126        static_assert_uimm_bits!(IMM8, 5);
32127        let neg_one = -1;
32128        let r = vcmpss(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
32129        r.cast_unsigned()
32130    }
32131}
32132
32133/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32134///
32135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
32136#[inline]
32137#[target_feature(enable = "avx512f")]
32138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32139#[rustc_legacy_const_generics(3)]
32140#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32141pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
32142    unsafe {
32143        static_assert_uimm_bits!(IMM8, 5);
32144        let r = vcmpss(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
32145        r.cast_unsigned()
32146    }
32147}
32148
32149/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32150/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32151///
32152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
32153#[inline]
32154#[target_feature(enable = "avx512f")]
32155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32156#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32157#[rustc_legacy_const_generics(2, 3)]
32158pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
32159    unsafe {
32160        static_assert_uimm_bits!(IMM5, 5);
32161        static_assert_mantissas_sae!(SAE);
32162        let neg_one = -1;
32163        let r = vcmpss(a, b, IMM5, neg_one, SAE);
32164        r.cast_unsigned()
32165    }
32166}
32167
32168/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
32169/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32170///
32171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
32172#[inline]
32173#[target_feature(enable = "avx512f")]
32174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32175#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32176#[rustc_legacy_const_generics(3, 4)]
32177pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
32178    k1: __mmask8,
32179    a: __m128,
32180    b: __m128,
32181) -> __mmask8 {
32182    unsafe {
32183        static_assert_uimm_bits!(IMM5, 5);
32184        static_assert_mantissas_sae!(SAE);
32185        let r = vcmpss(a, b, IMM5, k1 as i8, SAE);
32186        r.cast_unsigned()
32187    }
32188}
32189
32190/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
32191///
32192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
32193#[inline]
32194#[target_feature(enable = "avx512f")]
32195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32196#[rustc_legacy_const_generics(2)]
32197#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32198pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32199    unsafe {
32200        static_assert_uimm_bits!(IMM8, 5);
32201        let neg_one = -1;
32202        let r = vcmpsd(a, b, IMM8, neg_one, _MM_FROUND_CUR_DIRECTION);
32203        r.cast_unsigned()
32204    }
32205}
32206
32207/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
32208///
32209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
32210#[inline]
32211#[target_feature(enable = "avx512f")]
32212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32213#[rustc_legacy_const_generics(3)]
32214#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
32215pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
32216    unsafe {
32217        static_assert_uimm_bits!(IMM8, 5);
32218        let r = vcmpsd(a, b, IMM8, k1 as i8, _MM_FROUND_CUR_DIRECTION);
32219        r.cast_unsigned()
32220    }
32221}
32222
32223/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
32224/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32225///
32226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
32227#[inline]
32228#[target_feature(enable = "avx512f")]
32229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32230#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32231#[rustc_legacy_const_generics(2, 3)]
32232pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
32233    unsafe {
32234        static_assert_uimm_bits!(IMM5, 5);
32235        static_assert_mantissas_sae!(SAE);
32236        let neg_one = -1;
32237        let r = vcmpsd(a, b, IMM5, neg_one, SAE);
32238        r.cast_unsigned()
32239    }
32240}
32241
32242/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
32243/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
32244///
32245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
32246#[inline]
32247#[target_feature(enable = "avx512f")]
32248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32249#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
32250#[rustc_legacy_const_generics(3, 4)]
32251pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
32252    k1: __mmask8,
32253    a: __m128d,
32254    b: __m128d,
32255) -> __mmask8 {
32256    unsafe {
32257        static_assert_uimm_bits!(IMM5, 5);
32258        static_assert_mantissas_sae!(SAE);
32259        let r = vcmpsd(a, b, IMM5, k1 as i8, SAE);
32260        r.cast_unsigned()
32261    }
32262}
32263
32264/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32265///
32266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
32267#[inline]
32268#[target_feature(enable = "avx512f")]
32269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32270#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32271#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32272pub const fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32273    unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
32274}
32275
32276/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32277///
32278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
32279#[inline]
32280#[target_feature(enable = "avx512f")]
32281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32282#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32284pub const fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32285    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32286}
32287
32288/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32289///
32290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
32291#[inline]
32292#[target_feature(enable = "avx512f,avx512vl")]
32293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32294#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32295#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32296pub const fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32297    unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
32298}
32299
32300/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32301///
32302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
32303#[inline]
32304#[target_feature(enable = "avx512f,avx512vl")]
32305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32306#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32307#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32308pub const fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32309    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32310}
32311
32312/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
32313///
32314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
32315#[inline]
32316#[target_feature(enable = "avx512f,avx512vl")]
32317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32318#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32319#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32320pub const fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32321    unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
32322}
32323
32324/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32325///
32326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
32327#[inline]
32328#[target_feature(enable = "avx512f,avx512vl")]
32329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32330#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32332pub const fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32333    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
32334}
32335
32336/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32337///
32338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
32339#[inline]
32340#[target_feature(enable = "avx512f")]
32341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32342#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32343#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32344pub const fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32345    unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
32346}
32347
32348/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32349///
32350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
32351#[inline]
32352#[target_feature(enable = "avx512f")]
32353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32354#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32355#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32356pub const fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32357    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32358}
32359
32360/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32361///
32362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
32363#[inline]
32364#[target_feature(enable = "avx512f,avx512vl")]
32365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32366#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32367#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32368pub const fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32369    unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
32370}
32371
32372/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32373///
32374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
32375#[inline]
32376#[target_feature(enable = "avx512f,avx512vl")]
32377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32378#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32380pub const fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32381    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32382}
32383
32384/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32385///
32386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
32387#[inline]
32388#[target_feature(enable = "avx512f,avx512vl")]
32389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32390#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32391#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32392pub const fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32393    unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
32394}
32395
32396/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32397///
32398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
32399#[inline]
32400#[target_feature(enable = "avx512f,avx512vl")]
32401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32402#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32403#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32404pub const fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32405    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32406}
32407
32408/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32409///
32410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
32411#[inline]
32412#[target_feature(enable = "avx512f")]
32413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32414#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32416pub const fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32417    unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
32418}
32419
32420/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32421///
32422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
32423#[inline]
32424#[target_feature(enable = "avx512f")]
32425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32426#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32427#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32428pub const fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32429    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32430}
32431
32432/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32433///
32434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
32435#[inline]
32436#[target_feature(enable = "avx512f,avx512vl")]
32437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32438#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32439#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32440pub const fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32441    unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
32442}
32443
32444/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32445///
32446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
32447#[inline]
32448#[target_feature(enable = "avx512f,avx512vl")]
32449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32450#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32451#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32452pub const fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32453    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32454}
32455
32456/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32457///
32458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
32459#[inline]
32460#[target_feature(enable = "avx512f,avx512vl")]
32461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32462#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32464pub const fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32465    unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
32466}
32467
32468/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32469///
32470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
32471#[inline]
32472#[target_feature(enable = "avx512f,avx512vl")]
32473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32474#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32475#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32476pub const fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32477    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
32478}
32479
32480/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32481///
32482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
32483#[inline]
32484#[target_feature(enable = "avx512f")]
32485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32486#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32487#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32488pub const fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32489    unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
32490}
32491
32492/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32493///
32494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
32495#[inline]
32496#[target_feature(enable = "avx512f")]
32497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32498#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32499#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32500pub const fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32501    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32502}
32503
32504/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32505///
32506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
32507#[inline]
32508#[target_feature(enable = "avx512f,avx512vl")]
32509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32510#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32511#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32512pub const fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32513    unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
32514}
32515
32516/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32517///
32518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
32519#[inline]
32520#[target_feature(enable = "avx512f,avx512vl")]
32521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32522#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32523#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32524pub const fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32525    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32526}
32527
32528/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32529///
32530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
32531#[inline]
32532#[target_feature(enable = "avx512f,avx512vl")]
32533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32534#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32535#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32536pub const fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32537    unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
32538}
32539
32540/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32541///
32542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
32543#[inline]
32544#[target_feature(enable = "avx512f,avx512vl")]
32545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32546#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32548pub const fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32549    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
32550}
32551
32552/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32553///
32554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
32555#[inline]
32556#[target_feature(enable = "avx512f")]
32557#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32558#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32559#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32560pub const fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32561    unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
32562}
32563
32564/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32565///
32566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
32567#[inline]
32568#[target_feature(enable = "avx512f")]
32569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32570#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32572pub const fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32573    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32574}
32575
32576/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32577///
32578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
32579#[inline]
32580#[target_feature(enable = "avx512f,avx512vl")]
32581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32582#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32584pub const fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32585    unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
32586}
32587
32588/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32589///
32590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
32591#[inline]
32592#[target_feature(enable = "avx512f,avx512vl")]
32593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32594#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32595#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32596pub const fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32597    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32598}
32599
32600/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
32601///
32602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
32603#[inline]
32604#[target_feature(enable = "avx512f,avx512vl")]
32605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32606#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32607#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32608pub const fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32609    unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
32610}
32611
32612/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32613///
32614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
32615#[inline]
32616#[target_feature(enable = "avx512f,avx512vl")]
32617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32618#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32619#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32620pub const fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32621    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
32622}
32623
32624/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32625///
32626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
32627#[inline]
32628#[target_feature(enable = "avx512f")]
32629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32630#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32632pub const fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32633    unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
32634}
32635
32636/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32637///
32638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
32639#[inline]
32640#[target_feature(enable = "avx512f")]
32641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32642#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32643#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32644pub const fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32645    _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32646}
32647
32648/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32649///
32650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
32651#[inline]
32652#[target_feature(enable = "avx512f,avx512vl")]
32653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32654#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32655#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32656pub const fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32657    unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
32658}
32659
32660/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32661///
32662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
32663#[inline]
32664#[target_feature(enable = "avx512f,avx512vl")]
32665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32666#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32667#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32668pub const fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32669    _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32670}
32671
32672/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
32673///
32674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
32675#[inline]
32676#[target_feature(enable = "avx512f,avx512vl")]
32677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32678#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32679#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32680pub const fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32681    unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
32682}
32683
32684/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32685///
32686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
32687#[inline]
32688#[target_feature(enable = "avx512f,avx512vl")]
32689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32690#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
32691#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32692pub const fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32693    _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
32694}
32695
32696/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32697///
32698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
32699#[inline]
32700#[target_feature(enable = "avx512f")]
32701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32702#[rustc_legacy_const_generics(2)]
32703#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32704#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32705pub const fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32706    a: __m512i,
32707    b: __m512i,
32708) -> __mmask16 {
32709    unsafe {
32710        static_assert_uimm_bits!(IMM3, 3);
32711        let a = a.as_u32x16();
32712        let b = b.as_u32x16();
32713        let r = match IMM3 {
32714            0 => simd_eq(a, b),
32715            1 => simd_lt(a, b),
32716            2 => simd_le(a, b),
32717            3 => i32x16::ZERO,
32718            4 => simd_ne(a, b),
32719            5 => simd_ge(a, b),
32720            6 => simd_gt(a, b),
32721            _ => i32x16::splat(-1),
32722        };
32723        simd_bitmask(r)
32724    }
32725}
32726
32727/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32728///
32729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
32730#[inline]
32731#[target_feature(enable = "avx512f")]
32732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32733#[rustc_legacy_const_generics(3)]
32734#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32735#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32736pub const fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32737    k1: __mmask16,
32738    a: __m512i,
32739    b: __m512i,
32740) -> __mmask16 {
32741    unsafe {
32742        static_assert_uimm_bits!(IMM3, 3);
32743        let a = a.as_u32x16();
32744        let b = b.as_u32x16();
32745        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
32746        let r = match IMM3 {
32747            0 => simd_and(k1, simd_eq(a, b)),
32748            1 => simd_and(k1, simd_lt(a, b)),
32749            2 => simd_and(k1, simd_le(a, b)),
32750            3 => i32x16::ZERO,
32751            4 => simd_and(k1, simd_ne(a, b)),
32752            5 => simd_and(k1, simd_ge(a, b)),
32753            6 => simd_and(k1, simd_gt(a, b)),
32754            _ => k1,
32755        };
32756        simd_bitmask(r)
32757    }
32758}
32759
32760/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32761///
32762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
32763#[inline]
32764#[target_feature(enable = "avx512f,avx512vl")]
32765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32766#[rustc_legacy_const_generics(2)]
32767#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32768#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32769pub const fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32770    a: __m256i,
32771    b: __m256i,
32772) -> __mmask8 {
32773    unsafe {
32774        static_assert_uimm_bits!(IMM3, 3);
32775        let a = a.as_u32x8();
32776        let b = b.as_u32x8();
32777        let r = match IMM3 {
32778            0 => simd_eq(a, b),
32779            1 => simd_lt(a, b),
32780            2 => simd_le(a, b),
32781            3 => i32x8::ZERO,
32782            4 => simd_ne(a, b),
32783            5 => simd_ge(a, b),
32784            6 => simd_gt(a, b),
32785            _ => i32x8::splat(-1),
32786        };
32787        simd_bitmask(r)
32788    }
32789}
32790
32791/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32792///
32793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
32794#[inline]
32795#[target_feature(enable = "avx512f,avx512vl")]
32796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32797#[rustc_legacy_const_generics(3)]
32798#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32799#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32800pub const fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32801    k1: __mmask8,
32802    a: __m256i,
32803    b: __m256i,
32804) -> __mmask8 {
32805    unsafe {
32806        static_assert_uimm_bits!(IMM3, 3);
32807        let a = a.as_u32x8();
32808        let b = b.as_u32x8();
32809        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
32810        let r = match IMM3 {
32811            0 => simd_and(k1, simd_eq(a, b)),
32812            1 => simd_and(k1, simd_lt(a, b)),
32813            2 => simd_and(k1, simd_le(a, b)),
32814            3 => i32x8::ZERO,
32815            4 => simd_and(k1, simd_ne(a, b)),
32816            5 => simd_and(k1, simd_ge(a, b)),
32817            6 => simd_and(k1, simd_gt(a, b)),
32818            _ => k1,
32819        };
32820        simd_bitmask(r)
32821    }
32822}
32823
32824/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32825///
32826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
32827#[inline]
32828#[target_feature(enable = "avx512f,avx512vl")]
32829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32830#[rustc_legacy_const_generics(2)]
32831#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32832#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32833pub const fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32834    unsafe {
32835        static_assert_uimm_bits!(IMM3, 3);
32836        let a = a.as_u32x4();
32837        let b = b.as_u32x4();
32838        let r = match IMM3 {
32839            0 => simd_eq(a, b),
32840            1 => simd_lt(a, b),
32841            2 => simd_le(a, b),
32842            3 => i32x4::ZERO,
32843            4 => simd_ne(a, b),
32844            5 => simd_ge(a, b),
32845            6 => simd_gt(a, b),
32846            _ => i32x4::splat(-1),
32847        };
32848        simd_bitmask(r)
32849    }
32850}
32851
32852/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32853///
32854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
32855#[inline]
32856#[target_feature(enable = "avx512f,avx512vl")]
32857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32858#[rustc_legacy_const_generics(3)]
32859#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32860#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32861pub const fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
32862    k1: __mmask8,
32863    a: __m128i,
32864    b: __m128i,
32865) -> __mmask8 {
32866    unsafe {
32867        static_assert_uimm_bits!(IMM3, 3);
32868        let a = a.as_u32x4();
32869        let b = b.as_u32x4();
32870        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
32871        let r = match IMM3 {
32872            0 => simd_and(k1, simd_eq(a, b)),
32873            1 => simd_and(k1, simd_lt(a, b)),
32874            2 => simd_and(k1, simd_le(a, b)),
32875            3 => i32x4::ZERO,
32876            4 => simd_and(k1, simd_ne(a, b)),
32877            5 => simd_and(k1, simd_ge(a, b)),
32878            6 => simd_and(k1, simd_gt(a, b)),
32879            _ => k1,
32880        };
32881        simd_bitmask(r)
32882    }
32883}
32884
32885/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32886///
32887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
32888#[inline]
32889#[target_feature(enable = "avx512f")]
32890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32891#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32892#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32893pub const fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32894    unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
32895}
32896
32897/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32898///
32899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
32900#[inline]
32901#[target_feature(enable = "avx512f")]
32902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32903#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32904#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32905pub const fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32906    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32907}
32908
32909/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32910///
32911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
32912#[inline]
32913#[target_feature(enable = "avx512f,avx512vl")]
32914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32915#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32916#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32917pub const fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32918    unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
32919}
32920
32921/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32922///
32923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
32924#[inline]
32925#[target_feature(enable = "avx512f,avx512vl")]
32926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32927#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32929pub const fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32930    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32931}
32932
32933/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
32934///
32935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
32936#[inline]
32937#[target_feature(enable = "avx512f,avx512vl")]
32938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32939#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32941pub const fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
32942    unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
32943}
32944
32945/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32946///
32947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
32948#[inline]
32949#[target_feature(enable = "avx512f,avx512vl")]
32950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32951#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32952#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32953pub const fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32954    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
32955}
32956
32957/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32958///
32959/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
32960#[inline]
32961#[target_feature(enable = "avx512f")]
32962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32963#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32964#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32965pub const fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
32966    unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
32967}
32968
32969/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32970///
32971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
32972#[inline]
32973#[target_feature(enable = "avx512f")]
32974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32975#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32976#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32977pub const fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
32978    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
32979}
32980
32981/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
32982///
32983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
32984#[inline]
32985#[target_feature(enable = "avx512f,avx512vl")]
32986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32987#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
32988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
32989pub const fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
32990    unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
32991}
32992
32993/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32994///
32995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
32996#[inline]
32997#[target_feature(enable = "avx512f,avx512vl")]
32998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32999#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33001pub const fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33002    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33003}
33004
33005/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
33006///
33007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
33008#[inline]
33009#[target_feature(enable = "avx512f,avx512vl")]
33010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33011#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33012#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33013pub const fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33014    unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
33015}
33016
33017/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33018///
33019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
33020#[inline]
33021#[target_feature(enable = "avx512f,avx512vl")]
33022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33023#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33024#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33025pub const fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33026    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
33027}
33028
33029/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33030///
33031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
33032#[inline]
33033#[target_feature(enable = "avx512f")]
33034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33035#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33037pub const fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33038    unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
33039}
33040
33041/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33042///
33043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
33044#[inline]
33045#[target_feature(enable = "avx512f")]
33046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33047#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33048#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33049pub const fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33050    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33051}
33052
33053/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33054///
33055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
33056#[inline]
33057#[target_feature(enable = "avx512f,avx512vl")]
33058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33059#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33060#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33061pub const fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33062    unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
33063}
33064
33065/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33066///
33067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
33068#[inline]
33069#[target_feature(enable = "avx512f,avx512vl")]
33070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33071#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33072#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33073pub const fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33074    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33075}
33076
33077/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33078///
33079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
33080#[inline]
33081#[target_feature(enable = "avx512f,avx512vl")]
33082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33083#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33084#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33085pub const fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33086    unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
33087}
33088
33089/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33090///
33091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
33092#[inline]
33093#[target_feature(enable = "avx512f,avx512vl")]
33094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33095#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33097pub const fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33098    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
33099}
33100
33101/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33102///
33103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
33104#[inline]
33105#[target_feature(enable = "avx512f")]
33106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33107#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33109pub const fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33110    unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
33111}
33112
33113/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33114///
33115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
33116#[inline]
33117#[target_feature(enable = "avx512f")]
33118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33119#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33120#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33121pub const fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33122    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33123}
33124
33125/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33126///
33127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
33128#[inline]
33129#[target_feature(enable = "avx512f,avx512vl")]
33130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33131#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33133pub const fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33134    unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
33135}
33136
33137/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33138///
33139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
33140#[inline]
33141#[target_feature(enable = "avx512f,avx512vl")]
33142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33143#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33144#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33145pub const fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33146    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33147}
33148
33149/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33150///
33151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
33152#[inline]
33153#[target_feature(enable = "avx512f,avx512vl")]
33154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33155#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33156#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33157pub const fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33158    unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
33159}
33160
33161/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33162///
33163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
33164#[inline]
33165#[target_feature(enable = "avx512f,avx512vl")]
33166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33167#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33168#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33169pub const fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33170    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
33171}
33172
33173/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33174///
33175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
33176#[inline]
33177#[target_feature(enable = "avx512f")]
33178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33179#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33180#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33181pub const fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33182    unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
33183}
33184
33185/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33186///
33187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
33188#[inline]
33189#[target_feature(enable = "avx512f")]
33190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33191#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33193pub const fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33194    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33195}
33196
33197/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33198///
33199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
33200#[inline]
33201#[target_feature(enable = "avx512f,avx512vl")]
33202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33203#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33204#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33205pub const fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33206    unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
33207}
33208
33209/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33210///
33211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
33212#[inline]
33213#[target_feature(enable = "avx512f,avx512vl")]
33214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33215#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33217pub const fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33218    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33219}
33220
33221/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
33222///
33223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
33224#[inline]
33225#[target_feature(enable = "avx512f,avx512vl")]
33226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33227#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33228#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33229pub const fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33230    unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
33231}
33232
33233/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33234///
33235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
33236#[inline]
33237#[target_feature(enable = "avx512f,avx512vl")]
33238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33239#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33241pub const fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33242    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
33243}
33244
33245/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33246///
33247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
33248#[inline]
33249#[target_feature(enable = "avx512f")]
33250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33251#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33253pub const fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
33254    unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
33255}
33256
33257/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33258///
33259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
33260#[inline]
33261#[target_feature(enable = "avx512f")]
33262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33263#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33264#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33265pub const fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
33266    _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33267}
33268
33269/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33270///
33271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
33272#[inline]
33273#[target_feature(enable = "avx512f,avx512vl")]
33274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33275#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33277pub const fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
33278    unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
33279}
33280
33281/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33282///
33283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
33284#[inline]
33285#[target_feature(enable = "avx512f,avx512vl")]
33286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33287#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33288#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33289pub const fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33290    _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33291}
33292
33293/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
33294///
33295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
33296#[inline]
33297#[target_feature(enable = "avx512f,avx512vl")]
33298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33299#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33301pub const fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
33302    unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
33303}
33304
33305/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33306///
33307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
33308#[inline]
33309#[target_feature(enable = "avx512f,avx512vl")]
33310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33311#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
33312#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33313pub const fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33314    _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
33315}
33316
33317/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33318///
33319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
33320#[inline]
33321#[target_feature(enable = "avx512f")]
33322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33323#[rustc_legacy_const_generics(2)]
33324#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33325#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33326pub const fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33327    a: __m512i,
33328    b: __m512i,
33329) -> __mmask16 {
33330    unsafe {
33331        static_assert_uimm_bits!(IMM3, 3);
33332        let a = a.as_i32x16();
33333        let b = b.as_i32x16();
33334        let r = match IMM3 {
33335            0 => simd_eq(a, b),
33336            1 => simd_lt(a, b),
33337            2 => simd_le(a, b),
33338            3 => i32x16::ZERO,
33339            4 => simd_ne(a, b),
33340            5 => simd_ge(a, b),
33341            6 => simd_gt(a, b),
33342            _ => i32x16::splat(-1),
33343        };
33344        simd_bitmask(r)
33345    }
33346}
33347
33348/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33349///
33350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
33351#[inline]
33352#[target_feature(enable = "avx512f")]
33353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33354#[rustc_legacy_const_generics(3)]
33355#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33356#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33357pub const fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33358    k1: __mmask16,
33359    a: __m512i,
33360    b: __m512i,
33361) -> __mmask16 {
33362    unsafe {
33363        static_assert_uimm_bits!(IMM3, 3);
33364        let a = a.as_i32x16();
33365        let b = b.as_i32x16();
33366        let k1 = simd_select_bitmask(k1, i32x16::splat(-1), i32x16::ZERO);
33367        let r = match IMM3 {
33368            0 => simd_and(k1, simd_eq(a, b)),
33369            1 => simd_and(k1, simd_lt(a, b)),
33370            2 => simd_and(k1, simd_le(a, b)),
33371            3 => i32x16::ZERO,
33372            4 => simd_and(k1, simd_ne(a, b)),
33373            5 => simd_and(k1, simd_ge(a, b)),
33374            6 => simd_and(k1, simd_gt(a, b)),
33375            _ => k1,
33376        };
33377        simd_bitmask(r)
33378    }
33379}
33380
33381/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33382///
33383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
33384#[inline]
33385#[target_feature(enable = "avx512f,avx512vl")]
33386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33387#[rustc_legacy_const_generics(2)]
33388#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33390pub const fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33391    a: __m256i,
33392    b: __m256i,
33393) -> __mmask8 {
33394    unsafe {
33395        static_assert_uimm_bits!(IMM3, 3);
33396        let a = a.as_i32x8();
33397        let b = b.as_i32x8();
33398        let r = match IMM3 {
33399            0 => simd_eq(a, b),
33400            1 => simd_lt(a, b),
33401            2 => simd_le(a, b),
33402            3 => i32x8::ZERO,
33403            4 => simd_ne(a, b),
33404            5 => simd_ge(a, b),
33405            6 => simd_gt(a, b),
33406            _ => i32x8::splat(-1),
33407        };
33408        simd_bitmask(r)
33409    }
33410}
33411
33412/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33413///
33414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
33415#[inline]
33416#[target_feature(enable = "avx512f,avx512vl")]
33417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33418#[rustc_legacy_const_generics(3)]
33419#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33420#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33421pub const fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33422    k1: __mmask8,
33423    a: __m256i,
33424    b: __m256i,
33425) -> __mmask8 {
33426    unsafe {
33427        static_assert_uimm_bits!(IMM3, 3);
33428        let a = a.as_i32x8();
33429        let b = b.as_i32x8();
33430        let k1 = simd_select_bitmask(k1, i32x8::splat(-1), i32x8::ZERO);
33431        let r = match IMM3 {
33432            0 => simd_and(k1, simd_eq(a, b)),
33433            1 => simd_and(k1, simd_lt(a, b)),
33434            2 => simd_and(k1, simd_le(a, b)),
33435            3 => i32x8::ZERO,
33436            4 => simd_and(k1, simd_ne(a, b)),
33437            5 => simd_and(k1, simd_ge(a, b)),
33438            6 => simd_and(k1, simd_gt(a, b)),
33439            _ => k1,
33440        };
33441        simd_bitmask(r)
33442    }
33443}
33444
33445/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33446///
33447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
33448#[inline]
33449#[target_feature(enable = "avx512f,avx512vl")]
33450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33451#[rustc_legacy_const_generics(2)]
33452#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33453#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33454pub const fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33455    unsafe {
33456        static_assert_uimm_bits!(IMM3, 3);
33457        let a = a.as_i32x4();
33458        let b = b.as_i32x4();
33459        let r = match IMM3 {
33460            0 => simd_eq(a, b),
33461            1 => simd_lt(a, b),
33462            2 => simd_le(a, b),
33463            3 => i32x4::ZERO,
33464            4 => simd_ne(a, b),
33465            5 => simd_ge(a, b),
33466            6 => simd_gt(a, b),
33467            _ => i32x4::splat(-1),
33468        };
33469        simd_bitmask(r)
33470    }
33471}
33472
33473/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33474///
33475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
33476#[inline]
33477#[target_feature(enable = "avx512f,avx512vl")]
33478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33479#[rustc_legacy_const_generics(3)]
33480#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33482pub const fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
33483    k1: __mmask8,
33484    a: __m128i,
33485    b: __m128i,
33486) -> __mmask8 {
33487    unsafe {
33488        static_assert_uimm_bits!(IMM3, 3);
33489        let a = a.as_i32x4();
33490        let b = b.as_i32x4();
33491        let k1 = simd_select_bitmask(k1, i32x4::splat(-1), i32x4::ZERO);
33492        let r = match IMM3 {
33493            0 => simd_and(k1, simd_eq(a, b)),
33494            1 => simd_and(k1, simd_lt(a, b)),
33495            2 => simd_and(k1, simd_le(a, b)),
33496            3 => i32x4::ZERO,
33497            4 => simd_and(k1, simd_ne(a, b)),
33498            5 => simd_and(k1, simd_ge(a, b)),
33499            6 => simd_and(k1, simd_gt(a, b)),
33500            _ => k1,
33501        };
33502        simd_bitmask(r)
33503    }
33504}
33505
33506/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33507///
33508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
33509#[inline]
33510#[target_feature(enable = "avx512f")]
33511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33512#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33514pub const fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33515    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
33516}
33517
33518/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33519///
33520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
33521#[inline]
33522#[target_feature(enable = "avx512f")]
33523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33524#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33525#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33526pub const fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33527    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33528}
33529
33530/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33531///
33532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
33533#[inline]
33534#[target_feature(enable = "avx512f,avx512vl")]
33535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33536#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33537#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33538pub const fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33539    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
33540}
33541
33542/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33543///
33544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
33545#[inline]
33546#[target_feature(enable = "avx512f,avx512vl")]
33547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33548#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33549#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33550pub const fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33551    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33552}
33553
33554/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
33555///
33556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
33557#[inline]
33558#[target_feature(enable = "avx512f,avx512vl")]
33559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33560#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33562pub const fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33563    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
33564}
33565
33566/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33567///
33568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
33569#[inline]
33570#[target_feature(enable = "avx512f,avx512vl")]
33571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33572#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33573#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33574pub const fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33575    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
33576}
33577
33578/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33579///
33580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
33581#[inline]
33582#[target_feature(enable = "avx512f")]
33583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33584#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33585#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33586pub const fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33587    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
33588}
33589
33590/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33591///
33592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
33593#[inline]
33594#[target_feature(enable = "avx512f")]
33595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33596#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33598pub const fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33599    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33600}
33601
33602/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33603///
33604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
33605#[inline]
33606#[target_feature(enable = "avx512f,avx512vl")]
33607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33608#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33610pub const fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33611    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
33612}
33613
33614/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33615///
33616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
33617#[inline]
33618#[target_feature(enable = "avx512f,avx512vl")]
33619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33620#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33621#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33622pub const fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33623    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33624}
33625
33626/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
33627///
33628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
33629#[inline]
33630#[target_feature(enable = "avx512f,avx512vl")]
33631#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33632#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33633#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33634pub const fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33635    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
33636}
33637
33638/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33639///
33640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
33641#[inline]
33642#[target_feature(enable = "avx512f,avx512vl")]
33643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33644#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33645#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33646pub const fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33647    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
33648}
33649
33650/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33651///
33652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
33653#[inline]
33654#[target_feature(enable = "avx512f")]
33655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33656#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33657#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33658pub const fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33659    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
33660}
33661
33662/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33663///
33664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
33665#[inline]
33666#[target_feature(enable = "avx512f")]
33667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33668#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33669#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33670pub const fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33671    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33672}
33673
33674/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33675///
33676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
33677#[inline]
33678#[target_feature(enable = "avx512f,avx512vl")]
33679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33680#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33681#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33682pub const fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33683    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
33684}
33685
33686/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33687///
33688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
33689#[inline]
33690#[target_feature(enable = "avx512f,avx512vl")]
33691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33692#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33694pub const fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33695    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33696}
33697
33698/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
33701#[inline]
33702#[target_feature(enable = "avx512f,avx512vl")]
33703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33704#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33706pub const fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33707    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
33708}
33709
33710/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33711///
33712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
33713#[inline]
33714#[target_feature(enable = "avx512f,avx512vl")]
33715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33716#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33717#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33718pub const fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33719    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
33720}
33721
33722/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33723///
33724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
33725#[inline]
33726#[target_feature(enable = "avx512f")]
33727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33728#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33730pub const fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33731    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
33732}
33733
33734/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33735///
33736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
33737#[inline]
33738#[target_feature(enable = "avx512f")]
33739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33740#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33742pub const fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33743    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33744}
33745
33746/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33747///
33748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
33749#[inline]
33750#[target_feature(enable = "avx512f,avx512vl")]
33751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33752#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33753#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33754pub const fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33755    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
33756}
33757
33758/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33759///
33760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
33761#[inline]
33762#[target_feature(enable = "avx512f,avx512vl")]
33763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33764#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33766pub const fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33767    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33768}
33769
33770/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
33771///
33772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
33773#[inline]
33774#[target_feature(enable = "avx512f,avx512vl")]
33775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33776#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33777#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33778pub const fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33779    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
33780}
33781
33782/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33783///
33784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
33785#[inline]
33786#[target_feature(enable = "avx512f,avx512vl")]
33787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33788#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33789#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33790pub const fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33791    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
33792}
33793
33794/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33795///
33796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
33797#[inline]
33798#[target_feature(enable = "avx512f")]
33799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33800#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33801#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33802pub const fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33803    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
33804}
33805
33806/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33807///
33808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
33809#[inline]
33810#[target_feature(enable = "avx512f")]
33811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33812#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33813#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33814pub const fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33815    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33816}
33817
33818/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33819///
33820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
33821#[inline]
33822#[target_feature(enable = "avx512f,avx512vl")]
33823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33824#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33825#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33826pub const fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33827    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
33828}
33829
33830/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33831///
33832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
33833#[inline]
33834#[target_feature(enable = "avx512f,avx512vl")]
33835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33836#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33837#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33838pub const fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33839    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33840}
33841
33842/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
33843///
33844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
33845#[inline]
33846#[target_feature(enable = "avx512f,avx512vl")]
33847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33848#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33849#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33850pub const fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33851    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
33852}
33853
33854/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33855///
33856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
33857#[inline]
33858#[target_feature(enable = "avx512f,avx512vl")]
33859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33860#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33861#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33862pub const fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33863    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
33864}
33865
33866/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33867///
33868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
33869#[inline]
33870#[target_feature(enable = "avx512f")]
33871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33872#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33874pub const fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
33875    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
33876}
33877
33878/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33879///
33880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
33881#[inline]
33882#[target_feature(enable = "avx512f")]
33883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33884#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33886pub const fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
33887    _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33888}
33889
33890/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33891///
33892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
33893#[inline]
33894#[target_feature(enable = "avx512f,avx512vl")]
33895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33896#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33897#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33898pub const fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
33899    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
33900}
33901
33902/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33903///
33904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
33905#[inline]
33906#[target_feature(enable = "avx512f,avx512vl")]
33907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33908#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33909#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33910pub const fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
33911    _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33912}
33913
33914/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
33915///
33916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
33917#[inline]
33918#[target_feature(enable = "avx512f,avx512vl")]
33919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33920#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33921#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33922pub const fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33923    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
33924}
33925
33926/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33927///
33928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
33929#[inline]
33930#[target_feature(enable = "avx512f,avx512vl")]
33931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33932#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
33933#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33934pub const fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33935    _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
33936}
33937
33938/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33939///
33940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
33941#[inline]
33942#[target_feature(enable = "avx512f")]
33943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33944#[rustc_legacy_const_generics(2)]
33945#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33946#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33947pub const fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33948    a: __m512i,
33949    b: __m512i,
33950) -> __mmask8 {
33951    unsafe {
33952        static_assert_uimm_bits!(IMM3, 3);
33953        let a = a.as_u64x8();
33954        let b = b.as_u64x8();
33955        let r = match IMM3 {
33956            0 => simd_eq(a, b),
33957            1 => simd_lt(a, b),
33958            2 => simd_le(a, b),
33959            3 => i64x8::ZERO,
33960            4 => simd_ne(a, b),
33961            5 => simd_ge(a, b),
33962            6 => simd_gt(a, b),
33963            _ => i64x8::splat(-1),
33964        };
33965        simd_bitmask(r)
33966    }
33967}
33968
33969/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33970///
33971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
33972#[inline]
33973#[target_feature(enable = "avx512f")]
33974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33975#[rustc_legacy_const_generics(3)]
33976#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33977#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
33978pub const fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
33979    k1: __mmask8,
33980    a: __m512i,
33981    b: __m512i,
33982) -> __mmask8 {
33983    unsafe {
33984        static_assert_uimm_bits!(IMM3, 3);
33985        let a = a.as_u64x8();
33986        let b = b.as_u64x8();
33987        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
33988        let r = match IMM3 {
33989            0 => simd_and(k1, simd_eq(a, b)),
33990            1 => simd_and(k1, simd_lt(a, b)),
33991            2 => simd_and(k1, simd_le(a, b)),
33992            3 => i64x8::ZERO,
33993            4 => simd_and(k1, simd_ne(a, b)),
33994            5 => simd_and(k1, simd_ge(a, b)),
33995            6 => simd_and(k1, simd_gt(a, b)),
33996            _ => k1,
33997        };
33998        simd_bitmask(r)
33999    }
34000}
34001
34002/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34003///
34004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
34005#[inline]
34006#[target_feature(enable = "avx512f,avx512vl")]
34007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34008#[rustc_legacy_const_generics(2)]
34009#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34010#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34011pub const fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34012    a: __m256i,
34013    b: __m256i,
34014) -> __mmask8 {
34015    unsafe {
34016        static_assert_uimm_bits!(IMM3, 3);
34017        let a = a.as_u64x4();
34018        let b = b.as_u64x4();
34019        let r = match IMM3 {
34020            0 => simd_eq(a, b),
34021            1 => simd_lt(a, b),
34022            2 => simd_le(a, b),
34023            3 => i64x4::ZERO,
34024            4 => simd_ne(a, b),
34025            5 => simd_ge(a, b),
34026            6 => simd_gt(a, b),
34027            _ => i64x4::splat(-1),
34028        };
34029        simd_bitmask(r)
34030    }
34031}
34032
34033/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34034///
34035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
34036#[inline]
34037#[target_feature(enable = "avx512f,avx512vl")]
34038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34039#[rustc_legacy_const_generics(3)]
34040#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34041#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34042pub const fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34043    k1: __mmask8,
34044    a: __m256i,
34045    b: __m256i,
34046) -> __mmask8 {
34047    unsafe {
34048        static_assert_uimm_bits!(IMM3, 3);
34049        let a = a.as_u64x4();
34050        let b = b.as_u64x4();
34051        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
34052        let r = match IMM3 {
34053            0 => simd_and(k1, simd_eq(a, b)),
34054            1 => simd_and(k1, simd_lt(a, b)),
34055            2 => simd_and(k1, simd_le(a, b)),
34056            3 => i64x4::ZERO,
34057            4 => simd_and(k1, simd_ne(a, b)),
34058            5 => simd_and(k1, simd_ge(a, b)),
34059            6 => simd_and(k1, simd_gt(a, b)),
34060            _ => k1,
34061        };
34062        simd_bitmask(r)
34063    }
34064}
34065
34066/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34067///
34068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
34069#[inline]
34070#[target_feature(enable = "avx512f,avx512vl")]
34071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34072#[rustc_legacy_const_generics(2)]
34073#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34074#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34075pub const fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34076    unsafe {
34077        static_assert_uimm_bits!(IMM3, 3);
34078        let a = a.as_u64x2();
34079        let b = b.as_u64x2();
34080        let r = match IMM3 {
34081            0 => simd_eq(a, b),
34082            1 => simd_lt(a, b),
34083            2 => simd_le(a, b),
34084            3 => i64x2::ZERO,
34085            4 => simd_ne(a, b),
34086            5 => simd_ge(a, b),
34087            6 => simd_gt(a, b),
34088            _ => i64x2::splat(-1),
34089        };
34090        simd_bitmask(r)
34091    }
34092}
34093
34094/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34095///
34096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
34097#[inline]
34098#[target_feature(enable = "avx512f,avx512vl")]
34099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34100#[rustc_legacy_const_generics(3)]
34101#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34102#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34103pub const fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
34104    k1: __mmask8,
34105    a: __m128i,
34106    b: __m128i,
34107) -> __mmask8 {
34108    unsafe {
34109        static_assert_uimm_bits!(IMM3, 3);
34110        let a = a.as_u64x2();
34111        let b = b.as_u64x2();
34112        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
34113        let r = match IMM3 {
34114            0 => simd_and(k1, simd_eq(a, b)),
34115            1 => simd_and(k1, simd_lt(a, b)),
34116            2 => simd_and(k1, simd_le(a, b)),
34117            3 => i64x2::ZERO,
34118            4 => simd_and(k1, simd_ne(a, b)),
34119            5 => simd_and(k1, simd_ge(a, b)),
34120            6 => simd_and(k1, simd_gt(a, b)),
34121            _ => k1,
34122        };
34123        simd_bitmask(r)
34124    }
34125}
34126
34127/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34128///
34129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
34130#[inline]
34131#[target_feature(enable = "avx512f")]
34132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34133#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34134#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34135pub const fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34136    unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
34137}
34138
34139/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34140///
34141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
34142#[inline]
34143#[target_feature(enable = "avx512f")]
34144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34145#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34147pub const fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34148    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34149}
34150
34151/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34152///
34153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
34154#[inline]
34155#[target_feature(enable = "avx512f,avx512vl")]
34156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34157#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34159pub const fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34160    unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
34161}
34162
34163/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34164///
34165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
34166#[inline]
34167#[target_feature(enable = "avx512f,avx512vl")]
34168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34169#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34170#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34171pub const fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34172    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34173}
34174
34175/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
34176///
34177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
34178#[inline]
34179#[target_feature(enable = "avx512f,avx512vl")]
34180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34181#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34183pub const fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34184    unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
34185}
34186
34187/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34188///
34189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
34190#[inline]
34191#[target_feature(enable = "avx512f,avx512vl")]
34192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34193#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34194#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34195pub const fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34196    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
34197}
34198
34199/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34200///
34201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
34202#[inline]
34203#[target_feature(enable = "avx512f")]
34204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34205#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34206#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34207pub const fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34208    unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
34209}
34210
34211/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34212///
34213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
34214#[inline]
34215#[target_feature(enable = "avx512f")]
34216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34217#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34218#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34219pub const fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34220    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34221}
34222
34223/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34224///
34225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
34226#[inline]
34227#[target_feature(enable = "avx512f,avx512vl")]
34228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34229#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34231pub const fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34232    unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
34233}
34234
34235/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34236///
34237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
34238#[inline]
34239#[target_feature(enable = "avx512f,avx512vl")]
34240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34241#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34242#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34243pub const fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34244    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34245}
34246
34247/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
34248///
34249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
34250#[inline]
34251#[target_feature(enable = "avx512f,avx512vl")]
34252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34253#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34254#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34255pub const fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34256    unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
34257}
34258
34259/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34260///
34261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
34262#[inline]
34263#[target_feature(enable = "avx512f,avx512vl")]
34264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34265#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34267pub const fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34268    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
34269}
34270
34271/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34272///
34273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
34274#[inline]
34275#[target_feature(enable = "avx512f")]
34276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34277#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34278#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34279pub const fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34280    unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
34281}
34282
34283/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34284///
34285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
34286#[inline]
34287#[target_feature(enable = "avx512f")]
34288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34289#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34290#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34291pub const fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34292    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34293}
34294
34295/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34296///
34297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
34298#[inline]
34299#[target_feature(enable = "avx512f,avx512vl")]
34300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34301#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34302#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34303pub const fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34304    unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
34305}
34306
34307/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34308///
34309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
34310#[inline]
34311#[target_feature(enable = "avx512f,avx512vl")]
34312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34313#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34314#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34315pub const fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34316    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34317}
34318
34319/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
34320///
34321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
34322#[inline]
34323#[target_feature(enable = "avx512f,avx512vl")]
34324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34325#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34327pub const fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34328    unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
34329}
34330
34331/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34332///
34333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
34334#[inline]
34335#[target_feature(enable = "avx512f,avx512vl")]
34336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34337#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34338#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34339pub const fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34340    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
34341}
34342
34343/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34344///
34345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
34346#[inline]
34347#[target_feature(enable = "avx512f")]
34348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34349#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34350#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34351pub const fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34352    unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
34353}
34354
34355/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34356///
34357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
34358#[inline]
34359#[target_feature(enable = "avx512f")]
34360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34361#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34362#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34363pub const fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34364    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34365}
34366
34367/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34368///
34369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
34370#[inline]
34371#[target_feature(enable = "avx512f,avx512vl")]
34372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34373#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34374#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34375pub const fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34376    unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
34377}
34378
34379/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34380///
34381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
34382#[inline]
34383#[target_feature(enable = "avx512f,avx512vl")]
34384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34385#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34387pub const fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34388    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34389}
34390
34391/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
34392///
34393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
34394#[inline]
34395#[target_feature(enable = "avx512f,avx512vl")]
34396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34397#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34399pub const fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34400    unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
34401}
34402
34403/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34404///
34405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
34406#[inline]
34407#[target_feature(enable = "avx512f,avx512vl")]
34408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34409#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34410#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34411pub const fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34412    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
34413}
34414
34415/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34416///
34417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
34418#[inline]
34419#[target_feature(enable = "avx512f")]
34420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34421#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34422#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34423pub const fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34424    unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
34425}
34426
34427/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34428///
34429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
34430#[inline]
34431#[target_feature(enable = "avx512f")]
34432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34433#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34434#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34435pub const fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34436    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34437}
34438
34439/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34440///
34441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
34442#[inline]
34443#[target_feature(enable = "avx512f,avx512vl")]
34444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34445#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34446#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34447pub const fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34448    unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
34449}
34450
34451/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
34454#[inline]
34455#[target_feature(enable = "avx512f,avx512vl")]
34456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34457#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34458#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34459pub const fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34460    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34461}
34462
34463/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
34464///
34465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
34466#[inline]
34467#[target_feature(enable = "avx512f,avx512vl")]
34468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34469#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34471pub const fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34472    unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
34473}
34474
34475/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34476///
34477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
34478#[inline]
34479#[target_feature(enable = "avx512f,avx512vl")]
34480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34481#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34483pub const fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34484    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
34485}
34486
34487/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34488///
34489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
34490#[inline]
34491#[target_feature(enable = "avx512f")]
34492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34493#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34494#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34495pub const fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
34496    unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
34497}
34498
34499/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34500///
34501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
34502#[inline]
34503#[target_feature(enable = "avx512f")]
34504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34505#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34506#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34507pub const fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
34508    _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34509}
34510
34511/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34512///
34513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
34514#[inline]
34515#[target_feature(enable = "avx512f,avx512vl")]
34516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34517#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34518#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34519pub const fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
34520    unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
34521}
34522
34523/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34524///
34525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
34526#[inline]
34527#[target_feature(enable = "avx512f,avx512vl")]
34528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34529#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34531pub const fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
34532    _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34533}
34534
34535/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
34536///
34537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
34538#[inline]
34539#[target_feature(enable = "avx512f,avx512vl")]
34540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34541#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34542#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34543pub const fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
34544    unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
34545}
34546
34547/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34548///
34549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
34550#[inline]
34551#[target_feature(enable = "avx512f,avx512vl")]
34552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34553#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
34554#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34555pub const fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
34556    _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
34557}
34558
34559/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34560///
34561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
34562#[inline]
34563#[target_feature(enable = "avx512f")]
34564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34565#[rustc_legacy_const_generics(2)]
34566#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34568pub const fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34569    a: __m512i,
34570    b: __m512i,
34571) -> __mmask8 {
34572    unsafe {
34573        static_assert_uimm_bits!(IMM3, 3);
34574        let a = a.as_i64x8();
34575        let b = b.as_i64x8();
34576        let r = match IMM3 {
34577            0 => simd_eq(a, b),
34578            1 => simd_lt(a, b),
34579            2 => simd_le(a, b),
34580            3 => i64x8::ZERO,
34581            4 => simd_ne(a, b),
34582            5 => simd_ge(a, b),
34583            6 => simd_gt(a, b),
34584            _ => i64x8::splat(-1),
34585        };
34586        simd_bitmask(r)
34587    }
34588}
34589
34590/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34591///
34592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
34593#[inline]
34594#[target_feature(enable = "avx512f")]
34595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34596#[rustc_legacy_const_generics(3)]
34597#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34599pub const fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34600    k1: __mmask8,
34601    a: __m512i,
34602    b: __m512i,
34603) -> __mmask8 {
34604    unsafe {
34605        static_assert_uimm_bits!(IMM3, 3);
34606        let a = a.as_i64x8();
34607        let b = b.as_i64x8();
34608        let k1 = simd_select_bitmask(k1, i64x8::splat(-1), i64x8::ZERO);
34609        let r = match IMM3 {
34610            0 => simd_and(k1, simd_eq(a, b)),
34611            1 => simd_and(k1, simd_lt(a, b)),
34612            2 => simd_and(k1, simd_le(a, b)),
34613            3 => i64x8::ZERO,
34614            4 => simd_and(k1, simd_ne(a, b)),
34615            5 => simd_and(k1, simd_ge(a, b)),
34616            6 => simd_and(k1, simd_gt(a, b)),
34617            _ => k1,
34618        };
34619        simd_bitmask(r)
34620    }
34621}
34622
34623/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34624///
34625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
34626#[inline]
34627#[target_feature(enable = "avx512f,avx512vl")]
34628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34629#[rustc_legacy_const_generics(2)]
34630#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34631#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34632pub const fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34633    a: __m256i,
34634    b: __m256i,
34635) -> __mmask8 {
34636    unsafe {
34637        static_assert_uimm_bits!(IMM3, 3);
34638        let a = a.as_i64x4();
34639        let b = b.as_i64x4();
34640        let r = match IMM3 {
34641            0 => simd_eq(a, b),
34642            1 => simd_lt(a, b),
34643            2 => simd_le(a, b),
34644            3 => i64x4::ZERO,
34645            4 => simd_ne(a, b),
34646            5 => simd_ge(a, b),
34647            6 => simd_gt(a, b),
34648            _ => i64x4::splat(-1),
34649        };
34650        simd_bitmask(r)
34651    }
34652}
34653
34654/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34655///
34656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
34657#[inline]
34658#[target_feature(enable = "avx512f,avx512vl")]
34659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34660#[rustc_legacy_const_generics(3)]
34661#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34663pub const fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34664    k1: __mmask8,
34665    a: __m256i,
34666    b: __m256i,
34667) -> __mmask8 {
34668    unsafe {
34669        static_assert_uimm_bits!(IMM3, 3);
34670        let a = a.as_i64x4();
34671        let b = b.as_i64x4();
34672        let k1 = simd_select_bitmask(k1, i64x4::splat(-1), i64x4::ZERO);
34673        let r = match IMM3 {
34674            0 => simd_and(k1, simd_eq(a, b)),
34675            1 => simd_and(k1, simd_lt(a, b)),
34676            2 => simd_and(k1, simd_le(a, b)),
34677            3 => i64x4::ZERO,
34678            4 => simd_and(k1, simd_ne(a, b)),
34679            5 => simd_and(k1, simd_ge(a, b)),
34680            6 => simd_and(k1, simd_gt(a, b)),
34681            _ => k1,
34682        };
34683        simd_bitmask(r)
34684    }
34685}
34686
34687/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
34688///
34689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
34690#[inline]
34691#[target_feature(enable = "avx512f,avx512vl")]
34692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34693#[rustc_legacy_const_generics(2)]
34694#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34696pub const fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
34697    unsafe {
34698        static_assert_uimm_bits!(IMM3, 3);
34699        let a = a.as_i64x2();
34700        let b = b.as_i64x2();
34701        let r = match IMM3 {
34702            0 => simd_eq(a, b),
34703            1 => simd_lt(a, b),
34704            2 => simd_le(a, b),
34705            3 => i64x2::ZERO,
34706            4 => simd_ne(a, b),
34707            5 => simd_ge(a, b),
34708            6 => simd_gt(a, b),
34709            _ => i64x2::splat(-1),
34710        };
34711        simd_bitmask(r)
34712    }
34713}
34714
34715/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
34716///
34717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
34718#[inline]
34719#[target_feature(enable = "avx512f,avx512vl")]
34720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34721#[rustc_legacy_const_generics(3)]
34722#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
34723#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34724pub const fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
34725    k1: __mmask8,
34726    a: __m128i,
34727    b: __m128i,
34728) -> __mmask8 {
34729    unsafe {
34730        static_assert_uimm_bits!(IMM3, 3);
34731        let a = a.as_i64x2();
34732        let b = b.as_i64x2();
34733        let k1 = simd_select_bitmask(k1, i64x2::splat(-1), i64x2::ZERO);
34734        let r = match IMM3 {
34735            0 => simd_and(k1, simd_eq(a, b)),
34736            1 => simd_and(k1, simd_lt(a, b)),
34737            2 => simd_and(k1, simd_le(a, b)),
34738            3 => i64x2::ZERO,
34739            4 => simd_and(k1, simd_ne(a, b)),
34740            5 => simd_and(k1, simd_ge(a, b)),
34741            6 => simd_and(k1, simd_gt(a, b)),
34742            _ => k1,
34743        };
34744        simd_bitmask(r)
34745    }
34746}
34747
34748/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
34749///
34750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
34751#[inline]
34752#[target_feature(enable = "avx512f")]
34753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34754#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34755pub const fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
34756    unsafe { simd_reduce_add_ordered(a.as_i32x16(), 0) }
34757}
34758
34759/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34760///
34761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
34762#[inline]
34763#[target_feature(enable = "avx512f")]
34764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34765#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34766pub const fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
34767    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO), 0) }
34768}
34769
34770/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
34771///
34772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
34773#[inline]
34774#[target_feature(enable = "avx512f")]
34775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34776#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34777pub const fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
34778    unsafe { simd_reduce_add_ordered(a.as_i64x8(), 0) }
34779}
34780
34781/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
34782///
34783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
34784#[inline]
34785#[target_feature(enable = "avx512f")]
34786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34787#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34788pub const fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
34789    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO), 0) }
34790}
34791
34792/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34798#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34799pub const fn _mm512_reduce_add_ps(a: __m512) -> f32 {
34800    unsafe {
34801        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34802        let a = _mm256_add_ps(
34803            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
34804            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
34805        );
34806        let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
34807        let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
34808        simd_extract!(a, 0, f32) + simd_extract!(a, 1, f32)
34809    }
34810}
34811
34812/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34813///
34814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
34815#[inline]
34816#[target_feature(enable = "avx512f")]
34817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34819pub const fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
34820    unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
34821}
34822
34823/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
34824///
34825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
34826#[inline]
34827#[target_feature(enable = "avx512f")]
34828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34829#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34830pub const fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
34831    unsafe {
34832        let a = _mm256_add_pd(
34833            _mm512_extractf64x4_pd::<0>(a),
34834            _mm512_extractf64x4_pd::<1>(a),
34835        );
34836        let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
34837        simd_extract!(a, 0, f64) + simd_extract!(a, 1, f64)
34838    }
34839}
34840
34841/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
34842///
34843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
34844#[inline]
34845#[target_feature(enable = "avx512f")]
34846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34847#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34848pub const fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
34849    unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
34850}
34851
34852/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
34853///
34854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
34855#[inline]
34856#[target_feature(enable = "avx512f")]
34857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34858#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34859pub const fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
34860    unsafe { simd_reduce_mul_ordered(a.as_i32x16(), 1) }
34861}
34862
34863/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34864///
34865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
34866#[inline]
34867#[target_feature(enable = "avx512f")]
34868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34869#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34870pub const fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
34871    unsafe {
34872        simd_reduce_mul_ordered(
34873            simd_select_bitmask(k, a.as_i32x16(), _mm512_set1_epi32(1).as_i32x16()),
34874            1,
34875        )
34876    }
34877}
34878
34879/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
34880///
34881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
34882#[inline]
34883#[target_feature(enable = "avx512f")]
34884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34885#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34886pub const fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
34887    unsafe { simd_reduce_mul_ordered(a.as_i64x8(), 1) }
34888}
34889
34890/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
34891///
34892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
34893#[inline]
34894#[target_feature(enable = "avx512f")]
34895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34896#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34897pub const fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
34898    unsafe {
34899        simd_reduce_mul_ordered(
34900            simd_select_bitmask(k, a.as_i64x8(), _mm512_set1_epi64(1).as_i64x8()),
34901            1,
34902        )
34903    }
34904}
34905
34906/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34907///
34908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
34909#[inline]
34910#[target_feature(enable = "avx512f")]
34911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34912#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34913pub const fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
34914    unsafe {
34915        // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
34916        let a = _mm256_mul_ps(
34917            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
34918            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
34919        );
34920        let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
34921        let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
34922        simd_extract!(a, 0, f32) * simd_extract!(a, 1, f32)
34923    }
34924}
34925
34926/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34927///
34928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
34929#[inline]
34930#[target_feature(enable = "avx512f")]
34931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34932#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34933pub const fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
34934    unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
34935}
34936
34937/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
34938///
34939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
34940#[inline]
34941#[target_feature(enable = "avx512f")]
34942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34943#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34944pub const fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
34945    unsafe {
34946        let a = _mm256_mul_pd(
34947            _mm512_extractf64x4_pd::<0>(a),
34948            _mm512_extractf64x4_pd::<1>(a),
34949        );
34950        let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
34951        simd_extract!(a, 0, f64) * simd_extract!(a, 1, f64)
34952    }
34953}
34954
34955/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
34956///
34957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
34958#[inline]
34959#[target_feature(enable = "avx512f")]
34960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34962pub const fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
34963    unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
34964}
34965
34966/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
34967///
34968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
34969#[inline]
34970#[target_feature(enable = "avx512f")]
34971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34972#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34973pub const fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
34974    unsafe { simd_reduce_max(a.as_i32x16()) }
34975}
34976
34977/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
34978///
34979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
34980#[inline]
34981#[target_feature(enable = "avx512f")]
34982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34983#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
34984pub const fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
34985    unsafe {
34986        simd_reduce_max(simd_select_bitmask(
34987            k,
34988            a.as_i32x16(),
34989            i32x16::splat(i32::MIN),
34990        ))
34991    }
34992}
34993
34994/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
34995///
34996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
34997#[inline]
34998#[target_feature(enable = "avx512f")]
34999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35001pub const fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
35002    unsafe { simd_reduce_max(a.as_i64x8()) }
35003}
35004
35005/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35006///
35007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
35008#[inline]
35009#[target_feature(enable = "avx512f")]
35010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35011#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35012pub const fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
35013    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
35014}
35015
35016/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
35017///
35018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
35019#[inline]
35020#[target_feature(enable = "avx512f")]
35021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35023pub const fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
35024    unsafe { simd_reduce_max(a.as_u32x16()) }
35025}
35026
35027/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35028///
35029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
35030#[inline]
35031#[target_feature(enable = "avx512f")]
35032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35033#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35034pub const fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
35035    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) }
35036}
35037
35038/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
35039///
35040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
35041#[inline]
35042#[target_feature(enable = "avx512f")]
35043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35044#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35045pub const fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
35046    unsafe { simd_reduce_max(a.as_u64x8()) }
35047}
35048
35049/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
35050///
35051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
35052#[inline]
35053#[target_feature(enable = "avx512f")]
35054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35055#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35056pub const fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
35057    unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) }
35058}
35059
35060/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
35061///
35062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
35063#[inline]
35064#[target_feature(enable = "avx512f")]
35065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35066pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
35067    unsafe {
35068        let a = _mm256_max_ps(
35069            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
35070            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
35071        );
35072        let a = _mm_max_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
35073        let a = _mm_max_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
35074        _mm_cvtss_f32(_mm_max_ss(a, _mm_movehdup_ps(a)))
35075    }
35076}
35077
35078/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
35079///
35080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
35081#[inline]
35082#[target_feature(enable = "avx512f")]
35083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35084pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
35085    _mm512_reduce_max_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MIN), k, a))
35086}
35087
35088/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
35089///
35090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
35091#[inline]
35092#[target_feature(enable = "avx512f")]
35093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35094pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
35095    unsafe {
35096        let a = _mm256_max_pd(
35097            _mm512_extractf64x4_pd::<0>(a),
35098            _mm512_extractf64x4_pd::<1>(a),
35099        );
35100        let a = _mm_max_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
35101        _mm_cvtsd_f64(_mm_max_sd(a, simd_shuffle!(a, a, [1, 0])))
35102    }
35103}
35104
35105/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
35106///
35107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
35108#[inline]
35109#[target_feature(enable = "avx512f")]
35110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35111pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
35112    _mm512_reduce_max_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MIN), k, a))
35113}
35114
35115/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
35116///
35117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
35118#[inline]
35119#[target_feature(enable = "avx512f")]
35120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35121#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35122pub const fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
35123    unsafe { simd_reduce_min(a.as_i32x16()) }
35124}
35125
35126/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35127///
35128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
35129#[inline]
35130#[target_feature(enable = "avx512f")]
35131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35133pub const fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
35134    unsafe {
35135        simd_reduce_min(simd_select_bitmask(
35136            k,
35137            a.as_i32x16(),
35138            i32x16::splat(i32::MAX),
35139        ))
35140    }
35141}
35142
35143/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
35144///
35145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
35146#[inline]
35147#[target_feature(enable = "avx512f")]
35148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35149#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35150pub const fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
35151    unsafe { simd_reduce_min(a.as_i64x8()) }
35152}
35153
35154/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35155///
35156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
35157#[inline]
35158#[target_feature(enable = "avx512f")]
35159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35160#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35161pub const fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
35162    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) }
35163}
35164
35165/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
35166///
35167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
35168#[inline]
35169#[target_feature(enable = "avx512f")]
35170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35171#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35172pub const fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
35173    unsafe { simd_reduce_min(a.as_u32x16()) }
35174}
35175
35176/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35177///
35178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
35179#[inline]
35180#[target_feature(enable = "avx512f")]
35181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35182#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35183pub const fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
35184    unsafe {
35185        simd_reduce_min(simd_select_bitmask(
35186            k,
35187            a.as_u32x16(),
35188            u32x16::splat(u32::MAX),
35189        ))
35190    }
35191}
35192
35193/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
35194///
35195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
35196#[inline]
35197#[target_feature(enable = "avx512f")]
35198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35200pub const fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
35201    unsafe { simd_reduce_min(a.as_u64x8()) }
35202}
35203
35204/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
35205///
35206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
35207#[inline]
35208#[target_feature(enable = "avx512f")]
35209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35210#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35211pub const fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
35212    unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) }
35213}
35214
35215/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
35216///
35217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
35218#[inline]
35219#[target_feature(enable = "avx512f")]
35220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35221pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
35222    unsafe {
35223        let a = _mm256_min_ps(
35224            simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
35225            simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
35226        );
35227        let a = _mm_min_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
35228        let a = _mm_min_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
35229        _mm_cvtss_f32(_mm_min_ss(a, _mm_movehdup_ps(a)))
35230    }
35231}
35232
35233/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
35234///
35235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
35236#[inline]
35237#[target_feature(enable = "avx512f")]
35238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35239pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
35240    _mm512_reduce_min_ps(_mm512_mask_mov_ps(_mm512_set1_ps(f32::MAX), k, a))
35241}
35242
35243/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
35244///
35245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
35246#[inline]
35247#[target_feature(enable = "avx512f")]
35248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35249pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
35250    unsafe {
35251        let a = _mm256_min_pd(
35252            _mm512_extractf64x4_pd::<0>(a),
35253            _mm512_extractf64x4_pd::<1>(a),
35254        );
35255        let a = _mm_min_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
35256        _mm_cvtsd_f64(_mm_min_sd(a, simd_shuffle!(a, a, [1, 0])))
35257    }
35258}
35259
35260/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
35261///
35262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
35263#[inline]
35264#[target_feature(enable = "avx512f")]
35265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35266pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
35267    _mm512_reduce_min_pd(_mm512_mask_mov_pd(_mm512_set1_pd(f64::MAX), k, a))
35268}
35269
35270/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
35271///
35272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
35273#[inline]
35274#[target_feature(enable = "avx512f")]
35275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35276#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35277pub const fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
35278    unsafe { simd_reduce_and(a.as_i32x16()) }
35279}
35280
35281/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
35282///
35283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
35284#[inline]
35285#[target_feature(enable = "avx512f")]
35286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35287#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35288pub const fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
35289    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) }
35290}
35291
35292/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
35293///
35294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
35295#[inline]
35296#[target_feature(enable = "avx512f")]
35297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35299pub const fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
35300    unsafe { simd_reduce_and(a.as_i64x8()) }
35301}
35302
35303/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
35304///
35305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
35306#[inline]
35307#[target_feature(enable = "avx512f")]
35308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35310pub const fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
35311    unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) }
35312}
35313
35314/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
35315///
35316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
35317#[inline]
35318#[target_feature(enable = "avx512f")]
35319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35320#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35321pub const fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
35322    unsafe { simd_reduce_or(a.as_i32x16()) }
35323}
35324
35325/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
35326///
35327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
35328#[inline]
35329#[target_feature(enable = "avx512f")]
35330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35332pub const fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
35333    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
35334}
35335
35336/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
35337///
35338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
35339#[inline]
35340#[target_feature(enable = "avx512f")]
35341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35343pub const fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
35344    unsafe { simd_reduce_or(a.as_i64x8()) }
35345}
35346
35347/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
35348///
35349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
35350#[inline]
35351#[target_feature(enable = "avx512f")]
35352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35353#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35354pub const fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
35355    unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
35356}
35357
35358/// Returns vector of type `__m512d` with indeterminate elements.
35359/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35360/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35361/// In practice, this is typically equivalent to [`mem::zeroed`].
35362///
35363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
35364#[inline]
35365#[target_feature(enable = "avx512f")]
35366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35367// This intrinsic has no corresponding instruction.
35368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35369pub const fn _mm512_undefined_pd() -> __m512d {
35370    unsafe { const { mem::zeroed() } }
35371}
35372
35373/// Returns vector of type `__m512` with indeterminate elements.
35374/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35375/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35376/// In practice, this is typically equivalent to [`mem::zeroed`].
35377///
35378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
35379#[inline]
35380#[target_feature(enable = "avx512f")]
35381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35382// This intrinsic has no corresponding instruction.
35383#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35384pub const fn _mm512_undefined_ps() -> __m512 {
35385    unsafe { const { mem::zeroed() } }
35386}
35387
35388/// Return vector of type __m512i with indeterminate elements.
35389/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35390/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35391/// In practice, this is typically equivalent to [`mem::zeroed`].
35392///
35393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
35394#[inline]
35395#[target_feature(enable = "avx512f")]
35396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35397// This intrinsic has no corresponding instruction.
35398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35399pub const fn _mm512_undefined_epi32() -> __m512i {
35400    unsafe { const { mem::zeroed() } }
35401}
35402
35403/// Return vector of type __m512 with indeterminate elements.
35404/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
35405/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
35406/// In practice, this is typically equivalent to [`mem::zeroed`].
35407///
35408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
35409#[inline]
35410#[target_feature(enable = "avx512f")]
35411#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35412// This intrinsic has no corresponding instruction.
35413#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35414pub const fn _mm512_undefined() -> __m512 {
35415    unsafe { const { mem::zeroed() } }
35416}
35417
35418/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35419///
35420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
35421#[inline]
35422#[target_feature(enable = "avx512f")]
35423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35424#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35425#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35426pub const unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
35427    ptr::read_unaligned(mem_addr as *const __m512i)
35428}
35429
35430/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35431///
35432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
35433#[inline]
35434#[target_feature(enable = "avx512f,avx512vl")]
35435#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35436#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35437#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35438pub const unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
35439    ptr::read_unaligned(mem_addr as *const __m256i)
35440}
35441
35442/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35443///
35444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
35445#[inline]
35446#[target_feature(enable = "avx512f,avx512vl")]
35447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35448#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35450pub const unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
35451    ptr::read_unaligned(mem_addr as *const __m128i)
35452}
35453
35454/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35455///
35456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
35457#[inline]
35458#[target_feature(enable = "avx512f")]
35459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35460#[cfg_attr(test, assert_instr(vpmovdw))]
35461pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35462    vpmovdwmem(mem_addr.cast(), a.as_i32x16(), k);
35463}
35464
35465/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35466///
35467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
35468#[inline]
35469#[target_feature(enable = "avx512f,avx512vl")]
35470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35471#[cfg_attr(test, assert_instr(vpmovdw))]
35472pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35473    vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), k);
35474}
35475
35476/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35477///
35478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
35479#[inline]
35480#[target_feature(enable = "avx512f,avx512vl")]
35481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35482#[cfg_attr(test, assert_instr(vpmovdw))]
35483pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35484    vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), k);
35485}
35486
35487/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35488///
35489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
35490#[inline]
35491#[target_feature(enable = "avx512f")]
35492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35493#[cfg_attr(test, assert_instr(vpmovsdw))]
35494pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35495    vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), k);
35496}
35497
35498/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35499///
35500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
35501#[inline]
35502#[target_feature(enable = "avx512f,avx512vl")]
35503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35504#[cfg_attr(test, assert_instr(vpmovsdw))]
35505pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35506    vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), k);
35507}
35508
35509/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35510///
35511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
35512#[inline]
35513#[target_feature(enable = "avx512f,avx512vl")]
35514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35515#[cfg_attr(test, assert_instr(vpmovsdw))]
35516pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35517    vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), k);
35518}
35519
35520/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35521///
35522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
35523#[inline]
35524#[target_feature(enable = "avx512f")]
35525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35526#[cfg_attr(test, assert_instr(vpmovusdw))]
35527pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
35528    vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), k);
35529}
35530
35531/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35532///
35533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
35534#[inline]
35535#[target_feature(enable = "avx512f,avx512vl")]
35536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35537#[cfg_attr(test, assert_instr(vpmovusdw))]
35538pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35539    vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), k);
35540}
35541
35542/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35543///
35544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
35545#[inline]
35546#[target_feature(enable = "avx512f,avx512vl")]
35547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35548#[cfg_attr(test, assert_instr(vpmovusdw))]
35549pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35550    vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), k);
35551}
35552
35553/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35554///
35555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
35556#[inline]
35557#[target_feature(enable = "avx512f")]
35558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35559#[cfg_attr(test, assert_instr(vpmovdb))]
35560pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35561    vpmovdbmem(mem_addr, a.as_i32x16(), k);
35562}
35563
35564/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35565///
35566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
35567#[inline]
35568#[target_feature(enable = "avx512f,avx512vl")]
35569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35570#[cfg_attr(test, assert_instr(vpmovdb))]
35571pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35572    vpmovdbmem256(mem_addr, a.as_i32x8(), k);
35573}
35574
35575/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35576///
35577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
35578#[inline]
35579#[target_feature(enable = "avx512f,avx512vl")]
35580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35581#[cfg_attr(test, assert_instr(vpmovdb))]
35582pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35583    vpmovdbmem128(mem_addr, a.as_i32x4(), k);
35584}
35585
35586/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35587///
35588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
35589#[inline]
35590#[target_feature(enable = "avx512f")]
35591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35592#[cfg_attr(test, assert_instr(vpmovsdb))]
35593pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35594    vpmovsdbmem(mem_addr, a.as_i32x16(), k);
35595}
35596
35597/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35598///
35599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
35600#[inline]
35601#[target_feature(enable = "avx512f,avx512vl")]
35602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35603#[cfg_attr(test, assert_instr(vpmovsdb))]
35604pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35605    vpmovsdbmem256(mem_addr, a.as_i32x8(), k);
35606}
35607
35608/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35609///
35610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
35611#[inline]
35612#[target_feature(enable = "avx512f,avx512vl")]
35613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35614#[cfg_attr(test, assert_instr(vpmovsdb))]
35615pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35616    vpmovsdbmem128(mem_addr, a.as_i32x4(), k);
35617}
35618
35619/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35620///
35621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
35622#[inline]
35623#[target_feature(enable = "avx512f")]
35624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35625#[cfg_attr(test, assert_instr(vpmovusdb))]
35626pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
35627    vpmovusdbmem(mem_addr, a.as_i32x16(), k);
35628}
35629
35630/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35631///
35632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
35633#[inline]
35634#[target_feature(enable = "avx512f,avx512vl")]
35635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35636#[cfg_attr(test, assert_instr(vpmovusdb))]
35637pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35638    vpmovusdbmem256(mem_addr, a.as_i32x8(), k);
35639}
35640
35641/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35642///
35643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
35644#[inline]
35645#[target_feature(enable = "avx512f,avx512vl")]
35646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35647#[cfg_attr(test, assert_instr(vpmovusdb))]
35648pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35649    vpmovusdbmem128(mem_addr, a.as_i32x4(), k);
35650}
35651
35652/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35653///
35654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
35655#[inline]
35656#[target_feature(enable = "avx512f")]
35657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35658#[cfg_attr(test, assert_instr(vpmovqw))]
35659pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35660    vpmovqwmem(mem_addr.cast(), a.as_i64x8(), k);
35661}
35662
35663/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35664///
35665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
35666#[inline]
35667#[target_feature(enable = "avx512f,avx512vl")]
35668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35669#[cfg_attr(test, assert_instr(vpmovqw))]
35670pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35671    vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), k);
35672}
35673
35674/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35675///
35676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
35677#[inline]
35678#[target_feature(enable = "avx512f,avx512vl")]
35679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35680#[cfg_attr(test, assert_instr(vpmovqw))]
35681pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35682    vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), k);
35683}
35684
35685/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35686///
35687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
35688#[inline]
35689#[target_feature(enable = "avx512f")]
35690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35691#[cfg_attr(test, assert_instr(vpmovsqw))]
35692pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35693    vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), k);
35694}
35695
35696/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35697///
35698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
35699#[inline]
35700#[target_feature(enable = "avx512f,avx512vl")]
35701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35702#[cfg_attr(test, assert_instr(vpmovsqw))]
35703pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35704    vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), k);
35705}
35706
35707/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35708///
35709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
35710#[inline]
35711#[target_feature(enable = "avx512f,avx512vl")]
35712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35713#[cfg_attr(test, assert_instr(vpmovsqw))]
35714pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35715    vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), k);
35716}
35717
35718/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35719///
35720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
35721#[inline]
35722#[target_feature(enable = "avx512f")]
35723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35724#[cfg_attr(test, assert_instr(vpmovusqw))]
35725pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
35726    vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), k);
35727}
35728
35729/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35730///
35731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
35732#[inline]
35733#[target_feature(enable = "avx512f,avx512vl")]
35734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35735#[cfg_attr(test, assert_instr(vpmovusqw))]
35736pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
35737    vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), k);
35738}
35739
35740/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35741///
35742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
35743#[inline]
35744#[target_feature(enable = "avx512f,avx512vl")]
35745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35746#[cfg_attr(test, assert_instr(vpmovusqw))]
35747pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
35748    vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), k);
35749}
35750
35751/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35752///
35753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
35754#[inline]
35755#[target_feature(enable = "avx512f")]
35756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35757#[cfg_attr(test, assert_instr(vpmovqb))]
35758pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35759    vpmovqbmem(mem_addr, a.as_i64x8(), k);
35760}
35761
35762/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35763///
35764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
35765#[inline]
35766#[target_feature(enable = "avx512f,avx512vl")]
35767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35768#[cfg_attr(test, assert_instr(vpmovqb))]
35769pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35770    vpmovqbmem256(mem_addr, a.as_i64x4(), k);
35771}
35772
35773/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35774///
35775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
35776#[inline]
35777#[target_feature(enable = "avx512f,avx512vl")]
35778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35779#[cfg_attr(test, assert_instr(vpmovqb))]
35780pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35781    vpmovqbmem128(mem_addr, a.as_i64x2(), k);
35782}
35783
35784/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35785///
35786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
35787#[inline]
35788#[target_feature(enable = "avx512f")]
35789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35790#[cfg_attr(test, assert_instr(vpmovsqb))]
35791pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35792    vpmovsqbmem(mem_addr, a.as_i64x8(), k);
35793}
35794
35795/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35796///
35797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
35798#[inline]
35799#[target_feature(enable = "avx512f,avx512vl")]
35800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35801#[cfg_attr(test, assert_instr(vpmovsqb))]
35802pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35803    vpmovsqbmem256(mem_addr, a.as_i64x4(), k);
35804}
35805
35806/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35807///
35808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
35809#[inline]
35810#[target_feature(enable = "avx512f,avx512vl")]
35811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35812#[cfg_attr(test, assert_instr(vpmovsqb))]
35813pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35814    vpmovsqbmem128(mem_addr, a.as_i64x2(), k);
35815}
35816
35817/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35818///
35819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
35820#[inline]
35821#[target_feature(enable = "avx512f")]
35822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35823#[cfg_attr(test, assert_instr(vpmovusqb))]
35824pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
35825    vpmovusqbmem(mem_addr, a.as_i64x8(), k);
35826}
35827
35828/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35829///
35830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
35831#[inline]
35832#[target_feature(enable = "avx512f,avx512vl")]
35833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35834#[cfg_attr(test, assert_instr(vpmovusqb))]
35835pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
35836    vpmovusqbmem256(mem_addr, a.as_i64x4(), k);
35837}
35838
35839/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35840///
35841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
35842#[inline]
35843#[target_feature(enable = "avx512f,avx512vl")]
35844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35845#[cfg_attr(test, assert_instr(vpmovusqb))]
35846pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
35847    vpmovusqbmem128(mem_addr, a.as_i64x2(), k);
35848}
35849
35850///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35851///
35852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
35853#[inline]
35854#[target_feature(enable = "avx512f")]
35855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35856#[cfg_attr(test, assert_instr(vpmovqd))]
35857pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35858    vpmovqdmem(mem_addr.cast(), a.as_i64x8(), k);
35859}
35860
35861///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35862///
35863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
35864#[inline]
35865#[target_feature(enable = "avx512f,avx512vl")]
35866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35867#[cfg_attr(test, assert_instr(vpmovqd))]
35868pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35869    vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), k);
35870}
35871
35872///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35873///
35874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
35875#[inline]
35876#[target_feature(enable = "avx512f,avx512vl")]
35877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35878#[cfg_attr(test, assert_instr(vpmovqd))]
35879pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35880    vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), k);
35881}
35882
35883/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35884///
35885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
35886#[inline]
35887#[target_feature(enable = "avx512f")]
35888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35889#[cfg_attr(test, assert_instr(vpmovsqd))]
35890pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35891    vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), k);
35892}
35893
35894/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35895///
35896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
35897#[inline]
35898#[target_feature(enable = "avx512f,avx512vl")]
35899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35900#[cfg_attr(test, assert_instr(vpmovsqd))]
35901pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35902    vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), k);
35903}
35904
35905/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35906///
35907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
35908#[inline]
35909#[target_feature(enable = "avx512f,avx512vl")]
35910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35911#[cfg_attr(test, assert_instr(vpmovsqd))]
35912pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35913    vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), k);
35914}
35915
35916/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35917///
35918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
35919#[inline]
35920#[target_feature(enable = "avx512f")]
35921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35922#[cfg_attr(test, assert_instr(vpmovusqd))]
35923pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
35924    vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), k);
35925}
35926
35927/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35928///
35929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
35930#[inline]
35931#[target_feature(enable = "avx512f,avx512vl")]
35932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35933#[cfg_attr(test, assert_instr(vpmovusqd))]
35934pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
35935    vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), k);
35936}
35937
35938/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
35939///
35940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
35941#[inline]
35942#[target_feature(enable = "avx512f,avx512vl")]
35943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35944#[cfg_attr(test, assert_instr(vpmovusqd))]
35945pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
35946    vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), k);
35947}
35948
35949/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35950///
35951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
35952#[inline]
35953#[target_feature(enable = "avx512f")]
35954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35955#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35956#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35957pub const unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
35958    ptr::write_unaligned(mem_addr as *mut __m512i, a);
35959}
35960
35961/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35962///
35963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
35964#[inline]
35965#[target_feature(enable = "avx512f,avx512vl")]
35966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35967#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35968#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35969pub const unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
35970    ptr::write_unaligned(mem_addr as *mut __m256i, a);
35971}
35972
35973/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
35974///
35975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
35976#[inline]
35977#[target_feature(enable = "avx512f,avx512vl")]
35978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35979#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
35980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35981pub const unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
35982    ptr::write_unaligned(mem_addr as *mut __m128i, a);
35983}
35984
35985/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35986///
35987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
35988#[inline]
35989#[target_feature(enable = "avx512f")]
35990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35991#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
35992#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35993pub const unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
35994    ptr::read_unaligned(mem_addr as *const __m512i)
35995}
35996
35997/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
35998///
35999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
36000#[inline]
36001#[target_feature(enable = "avx512f,avx512vl")]
36002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36003#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36005pub const unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
36006    ptr::read_unaligned(mem_addr as *const __m256i)
36007}
36008
36009/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
36010///
36011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
36012#[inline]
36013#[target_feature(enable = "avx512f,avx512vl")]
36014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36015#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36016#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36017pub const unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
36018    ptr::read_unaligned(mem_addr as *const __m128i)
36019}
36020
36021/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36022///
36023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
36024#[inline]
36025#[target_feature(enable = "avx512f")]
36026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36027#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36028#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36029pub const unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
36030    ptr::write_unaligned(mem_addr as *mut __m512i, a);
36031}
36032
36033/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36034///
36035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
36036#[inline]
36037#[target_feature(enable = "avx512f,avx512vl")]
36038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36039#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36040#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36041pub const unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
36042    ptr::write_unaligned(mem_addr as *mut __m256i, a);
36043}
36044
36045/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
36046///
36047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
36048#[inline]
36049#[target_feature(enable = "avx512f,avx512vl")]
36050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36051#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
36052#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36053pub const unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
36054    ptr::write_unaligned(mem_addr as *mut __m128i, a);
36055}
36056
36057/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
36058///
36059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
36060#[inline]
36061#[target_feature(enable = "avx512f")]
36062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36063#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
36064#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36065pub const unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
36066    ptr::read_unaligned(mem_addr)
36067}
36068
36069/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
36070///
36071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
36072#[inline]
36073#[target_feature(enable = "avx512f")]
36074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36075#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
36076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36077pub const unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
36078    ptr::write_unaligned(mem_addr, a);
36079}
36080
36081/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
36082/// floating-point elements) from memory into result.
36083/// `mem_addr` does not need to be aligned on any particular boundary.
36084///
36085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
36086#[inline]
36087#[target_feature(enable = "avx512f")]
36088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36089#[cfg_attr(test, assert_instr(vmovups))]
36090#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36091pub const unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
36092    ptr::read_unaligned(mem_addr as *const __m512d)
36093}
36094
36095/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
36096/// floating-point elements) from `a` into memory.
36097/// `mem_addr` does not need to be aligned on any particular boundary.
36098///
36099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
36100#[inline]
36101#[target_feature(enable = "avx512f")]
36102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36103#[cfg_attr(test, assert_instr(vmovups))]
36104#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36105pub const unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
36106    ptr::write_unaligned(mem_addr as *mut __m512d, a);
36107}
36108
36109/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
36110/// floating-point elements) from memory into result.
36111/// `mem_addr` does not need to be aligned on any particular boundary.
36112///
36113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
36114#[inline]
36115#[target_feature(enable = "avx512f")]
36116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36117#[cfg_attr(test, assert_instr(vmovups))]
36118#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36119pub const unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
36120    ptr::read_unaligned(mem_addr as *const __m512)
36121}
36122
36123/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
36124/// floating-point elements) from `a` into memory.
36125/// `mem_addr` does not need to be aligned on any particular boundary.
36126///
36127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
36128#[inline]
36129#[target_feature(enable = "avx512f")]
36130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36131#[cfg_attr(test, assert_instr(vmovups))]
36132#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36133pub const unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
36134    ptr::write_unaligned(mem_addr as *mut __m512, a);
36135}
36136
36137/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36138///
36139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
36140#[inline]
36141#[target_feature(enable = "avx512f")]
36142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36143#[cfg_attr(
36144    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36145    assert_instr(vmovaps)
36146)] //should be vmovdqa32
36147#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36148pub const unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
36149    ptr::read(mem_addr)
36150}
36151
36152/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36153///
36154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
36155#[inline]
36156#[target_feature(enable = "avx512f")]
36157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36158#[cfg_attr(
36159    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36160    assert_instr(vmovaps)
36161)] //should be vmovdqa32
36162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36163pub const unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
36164    ptr::write(mem_addr, a);
36165}
36166
36167/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36168///
36169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
36170#[inline]
36171#[target_feature(enable = "avx512f")]
36172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36173#[cfg_attr(
36174    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36175    assert_instr(vmovaps)
36176)] //should be vmovdqa32
36177#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36178pub const unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
36179    ptr::read(mem_addr as *const __m512i)
36180}
36181
36182/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36183///
36184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
36185#[inline]
36186#[target_feature(enable = "avx512f,avx512vl")]
36187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36188#[cfg_attr(
36189    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36190    assert_instr(vmovaps)
36191)] //should be vmovdqa32
36192#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36193pub const unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
36194    ptr::read(mem_addr as *const __m256i)
36195}
36196
36197/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36198///
36199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
36200#[inline]
36201#[target_feature(enable = "avx512f,avx512vl")]
36202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36203#[cfg_attr(
36204    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36205    assert_instr(vmovaps)
36206)] //should be vmovdqa32
36207#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36208pub const unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
36209    ptr::read(mem_addr as *const __m128i)
36210}
36211
36212/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36213///
36214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
36215#[inline]
36216#[target_feature(enable = "avx512f")]
36217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36218#[cfg_attr(
36219    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36220    assert_instr(vmovaps)
36221)] //should be vmovdqa32
36222#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36223pub const unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
36224    ptr::write(mem_addr as *mut __m512i, a);
36225}
36226
36227/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36228///
36229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
36230#[inline]
36231#[target_feature(enable = "avx512f,avx512vl")]
36232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36233#[cfg_attr(
36234    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36235    assert_instr(vmovaps)
36236)] //should be vmovdqa32
36237#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36238pub const unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
36239    ptr::write(mem_addr as *mut __m256i, a);
36240}
36241
36242/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36243///
36244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
36245#[inline]
36246#[target_feature(enable = "avx512f,avx512vl")]
36247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36248#[cfg_attr(
36249    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36250    assert_instr(vmovaps)
36251)] //should be vmovdqa32
36252#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36253pub const unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
36254    ptr::write(mem_addr as *mut __m128i, a);
36255}
36256
36257/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36258///
36259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
36260#[inline]
36261#[target_feature(enable = "avx512f")]
36262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36263#[cfg_attr(
36264    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36265    assert_instr(vmovaps)
36266)] //should be vmovdqa64
36267#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36268pub const unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
36269    ptr::read(mem_addr as *const __m512i)
36270}
36271
36272/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36273///
36274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
36275#[inline]
36276#[target_feature(enable = "avx512f,avx512vl")]
36277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36278#[cfg_attr(
36279    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36280    assert_instr(vmovaps)
36281)] //should be vmovdqa64
36282#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36283pub const unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
36284    ptr::read(mem_addr as *const __m256i)
36285}
36286
36287/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36288///
36289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
36290#[inline]
36291#[target_feature(enable = "avx512f,avx512vl")]
36292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36293#[cfg_attr(
36294    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36295    assert_instr(vmovaps)
36296)] //should be vmovdqa64
36297#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36298pub const unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
36299    ptr::read(mem_addr as *const __m128i)
36300}
36301
36302/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36303///
36304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
36305#[inline]
36306#[target_feature(enable = "avx512f")]
36307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36308#[cfg_attr(
36309    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36310    assert_instr(vmovaps)
36311)] //should be vmovdqa64
36312#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36313pub const unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
36314    ptr::write(mem_addr as *mut __m512i, a);
36315}
36316
36317/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36318///
36319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
36320#[inline]
36321#[target_feature(enable = "avx512f,avx512vl")]
36322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36323#[cfg_attr(
36324    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36325    assert_instr(vmovaps)
36326)] //should be vmovdqa64
36327#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36328pub const unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
36329    ptr::write(mem_addr as *mut __m256i, a);
36330}
36331
36332/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
36333///
36334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
36335#[inline]
36336#[target_feature(enable = "avx512f,avx512vl")]
36337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36338#[cfg_attr(
36339    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36340    assert_instr(vmovaps)
36341)] //should be vmovdqa64
36342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36343pub const unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
36344    ptr::write(mem_addr as *mut __m128i, a);
36345}
36346
36347/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36348///
36349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
36350#[inline]
36351#[target_feature(enable = "avx512f")]
36352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36353#[cfg_attr(
36354    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36355    assert_instr(vmovaps)
36356)]
36357#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36358pub const unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
36359    ptr::read(mem_addr as *const __m512)
36360}
36361
36362/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36363///
36364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
36365#[inline]
36366#[target_feature(enable = "avx512f")]
36367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36368#[cfg_attr(
36369    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36370    assert_instr(vmovaps)
36371)]
36372#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36373pub const unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
36374    ptr::write(mem_addr as *mut __m512, a);
36375}
36376
36377/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36378///
36379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
36380#[inline]
36381#[target_feature(enable = "avx512f")]
36382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36383#[cfg_attr(
36384    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36385    assert_instr(vmovaps)
36386)] //should be vmovapd
36387#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36388pub const unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
36389    ptr::read(mem_addr as *const __m512d)
36390}
36391
36392/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36393///
36394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
36395#[inline]
36396#[target_feature(enable = "avx512f")]
36397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36398#[cfg_attr(
36399    all(test, not(all(target_arch = "x86", target_env = "msvc"))),
36400    assert_instr(vmovaps)
36401)] //should be vmovapd
36402#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36403pub const unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
36404    ptr::write(mem_addr as *mut __m512d, a);
36405}
36406
36407/// Load packed 32-bit integers from memory into dst using writemask k
36408/// (elements are copied from src when the corresponding mask bit is not set).
36409/// mem_addr does not need to be aligned on any particular boundary.
36410///
36411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
36412#[inline]
36413#[target_feature(enable = "avx512f")]
36414#[cfg_attr(test, assert_instr(vmovdqu32))]
36415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36417pub const unsafe fn _mm512_mask_loadu_epi32(
36418    src: __m512i,
36419    k: __mmask16,
36420    mem_addr: *const i32,
36421) -> __m512i {
36422    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
36423    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x16()).as_m512i()
36424}
36425
36426/// Load packed 32-bit integers from memory into dst using zeromask k
36427/// (elements are zeroed out when the corresponding mask bit is not set).
36428/// mem_addr does not need to be aligned on any particular boundary.
36429///
36430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
36431#[inline]
36432#[target_feature(enable = "avx512f")]
36433#[cfg_attr(test, assert_instr(vmovdqu32))]
36434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36436pub const unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
36437    _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
36438}
36439
36440/// Load packed 64-bit integers from memory into dst using writemask k
36441/// (elements are copied from src when the corresponding mask bit is not set).
36442/// mem_addr does not need to be aligned on any particular boundary.
36443///
36444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
36445#[inline]
36446#[target_feature(enable = "avx512f")]
36447#[cfg_attr(test, assert_instr(vmovdqu64))]
36448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36450pub const unsafe fn _mm512_mask_loadu_epi64(
36451    src: __m512i,
36452    k: __mmask8,
36453    mem_addr: *const i64,
36454) -> __m512i {
36455    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
36456    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x8()).as_m512i()
36457}
36458
36459/// Load packed 64-bit integers from memory into dst using zeromask k
36460/// (elements are zeroed out when the corresponding mask bit is not set).
36461/// mem_addr does not need to be aligned on any particular boundary.
36462///
36463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
36464#[inline]
36465#[target_feature(enable = "avx512f")]
36466#[cfg_attr(test, assert_instr(vmovdqu64))]
36467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36468#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36469pub const unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
36470    _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
36471}
36472
36473/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36474/// (elements are copied from src when the corresponding mask bit is not set).
36475/// mem_addr does not need to be aligned on any particular boundary.
36476///
36477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
36478#[inline]
36479#[target_feature(enable = "avx512f")]
36480#[cfg_attr(test, assert_instr(vmovups))]
36481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36482#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36483pub const unsafe fn _mm512_mask_loadu_ps(
36484    src: __m512,
36485    k: __mmask16,
36486    mem_addr: *const f32,
36487) -> __m512 {
36488    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
36489    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x16()).as_m512()
36490}
36491
36492/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36493/// (elements are zeroed out when the corresponding mask bit is not set).
36494/// mem_addr does not need to be aligned on any particular boundary.
36495///
36496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
36497#[inline]
36498#[target_feature(enable = "avx512f")]
36499#[cfg_attr(test, assert_instr(vmovups))]
36500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36501#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36502pub const unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
36503    _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
36504}
36505
36506/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36507/// (elements are copied from src when the corresponding mask bit is not set).
36508/// mem_addr does not need to be aligned on any particular boundary.
36509///
36510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
36511#[inline]
36512#[target_feature(enable = "avx512f")]
36513#[cfg_attr(test, assert_instr(vmovupd))]
36514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36515#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36516pub const unsafe fn _mm512_mask_loadu_pd(
36517    src: __m512d,
36518    k: __mmask8,
36519    mem_addr: *const f64,
36520) -> __m512d {
36521    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
36522    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x8()).as_m512d()
36523}
36524
36525/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36526/// (elements are zeroed out when the corresponding mask bit is not set).
36527/// mem_addr does not need to be aligned on any particular boundary.
36528///
36529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
36530#[inline]
36531#[target_feature(enable = "avx512f")]
36532#[cfg_attr(test, assert_instr(vmovupd))]
36533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36535pub const unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36536    _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
36537}
36538
36539/// Load packed 32-bit integers from memory into dst using writemask k
36540/// (elements are copied from src when the corresponding mask bit is not set).
36541/// mem_addr does not need to be aligned on any particular boundary.
36542///
36543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
36544#[inline]
36545#[target_feature(enable = "avx512f,avx512vl")]
36546#[cfg_attr(test, assert_instr(vmovdqu32))]
36547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36548#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36549pub const unsafe fn _mm256_mask_loadu_epi32(
36550    src: __m256i,
36551    k: __mmask8,
36552    mem_addr: *const i32,
36553) -> __m256i {
36554    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
36555    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x8()).as_m256i()
36556}
36557
36558/// Load packed 32-bit integers from memory into dst using zeromask k
36559/// (elements are zeroed out when the corresponding mask bit is not set).
36560/// mem_addr does not need to be aligned on any particular boundary.
36561///
36562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
36563#[inline]
36564#[target_feature(enable = "avx512f,avx512vl")]
36565#[cfg_attr(test, assert_instr(vmovdqu32))]
36566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36568pub const unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
36569    _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
36570}
36571
36572/// Load packed 64-bit integers from memory into dst using writemask k
36573/// (elements are copied from src when the corresponding mask bit is not set).
36574/// mem_addr does not need to be aligned on any particular boundary.
36575///
36576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
36577#[inline]
36578#[target_feature(enable = "avx512f,avx512vl")]
36579#[cfg_attr(test, assert_instr(vmovdqu64))]
36580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36581#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36582pub const unsafe fn _mm256_mask_loadu_epi64(
36583    src: __m256i,
36584    k: __mmask8,
36585    mem_addr: *const i64,
36586) -> __m256i {
36587    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
36588    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x4()).as_m256i()
36589}
36590
36591/// Load packed 64-bit integers from memory into dst using zeromask k
36592/// (elements are zeroed out when the corresponding mask bit is not set).
36593/// mem_addr does not need to be aligned on any particular boundary.
36594///
36595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
36596#[inline]
36597#[target_feature(enable = "avx512f,avx512vl")]
36598#[cfg_attr(test, assert_instr(vmovdqu64))]
36599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36600#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36601pub const unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
36602    _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
36603}
36604
36605/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36606/// (elements are copied from src when the corresponding mask bit is not set).
36607/// mem_addr does not need to be aligned on any particular boundary.
36608///
36609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
36610#[inline]
36611#[target_feature(enable = "avx512f,avx512vl")]
36612#[cfg_attr(test, assert_instr(vmovups))]
36613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36614#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36615pub const unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36616    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
36617    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x8()).as_m256()
36618}
36619
36620/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36621/// (elements are zeroed out when the corresponding mask bit is not set).
36622/// mem_addr does not need to be aligned on any particular boundary.
36623///
36624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
36625#[inline]
36626#[target_feature(enable = "avx512f,avx512vl")]
36627#[cfg_attr(test, assert_instr(vmovups))]
36628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36629#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36630pub const unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
36631    _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
36632}
36633
36634/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36635/// (elements are copied from src when the corresponding mask bit is not set).
36636/// mem_addr does not need to be aligned on any particular boundary.
36637///
36638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
36639#[inline]
36640#[target_feature(enable = "avx512f,avx512vl")]
36641#[cfg_attr(test, assert_instr(vmovupd))]
36642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36643#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36644pub const unsafe fn _mm256_mask_loadu_pd(
36645    src: __m256d,
36646    k: __mmask8,
36647    mem_addr: *const f64,
36648) -> __m256d {
36649    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
36650    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x4()).as_m256d()
36651}
36652
36653/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36654/// (elements are zeroed out when the corresponding mask bit is not set).
36655/// mem_addr does not need to be aligned on any particular boundary.
36656///
36657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
36658#[inline]
36659#[target_feature(enable = "avx512f,avx512vl")]
36660#[cfg_attr(test, assert_instr(vmovupd))]
36661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36663pub const unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36664    _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
36665}
36666
36667/// Load packed 32-bit integers from memory into dst using writemask k
36668/// (elements are copied from src when the corresponding mask bit is not set).
36669/// mem_addr does not need to be aligned on any particular boundary.
36670///
36671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
36672#[inline]
36673#[target_feature(enable = "avx512f,avx512vl")]
36674#[cfg_attr(test, assert_instr(vmovdqu32))]
36675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36676#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36677pub const unsafe fn _mm_mask_loadu_epi32(
36678    src: __m128i,
36679    k: __mmask8,
36680    mem_addr: *const i32,
36681) -> __m128i {
36682    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
36683    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x4()).as_m128i()
36684}
36685
36686/// Load packed 32-bit integers from memory into dst using zeromask k
36687/// (elements are zeroed out when the corresponding mask bit is not set).
36688/// mem_addr does not need to be aligned on any particular boundary.
36689///
36690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
36691#[inline]
36692#[target_feature(enable = "avx512f,avx512vl")]
36693#[cfg_attr(test, assert_instr(vmovdqu32))]
36694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36696pub const unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
36697    _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
36698}
36699
36700/// Load packed 64-bit integers from memory into dst using writemask k
36701/// (elements are copied from src when the corresponding mask bit is not set).
36702/// mem_addr does not need to be aligned on any particular boundary.
36703///
36704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
36705#[inline]
36706#[target_feature(enable = "avx512f,avx512vl")]
36707#[cfg_attr(test, assert_instr(vmovdqu64))]
36708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36709#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36710pub const unsafe fn _mm_mask_loadu_epi64(
36711    src: __m128i,
36712    k: __mmask8,
36713    mem_addr: *const i64,
36714) -> __m128i {
36715    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
36716    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x2()).as_m128i()
36717}
36718
36719/// Load packed 64-bit integers from memory into dst using zeromask k
36720/// (elements are zeroed out when the corresponding mask bit is not set).
36721/// mem_addr does not need to be aligned on any particular boundary.
36722///
36723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
36724#[inline]
36725#[target_feature(enable = "avx512f,avx512vl")]
36726#[cfg_attr(test, assert_instr(vmovdqu64))]
36727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36729pub const unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
36730    _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
36731}
36732
36733/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36734/// (elements are copied from src when the corresponding mask bit is not set).
36735/// mem_addr does not need to be aligned on any particular boundary.
36736///
36737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
36738#[inline]
36739#[target_feature(enable = "avx512f,avx512vl")]
36740#[cfg_attr(test, assert_instr(vmovups))]
36741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36742#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36743pub const unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
36744    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
36745    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x4()).as_m128()
36746}
36747
36748/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36749/// (elements are zeroed out when the corresponding mask bit is not set).
36750/// mem_addr does not need to be aligned on any particular boundary.
36751///
36752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
36753#[inline]
36754#[target_feature(enable = "avx512f,avx512vl")]
36755#[cfg_attr(test, assert_instr(vmovups))]
36756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36757#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36758pub const unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
36759    _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
36760}
36761
36762/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36763/// (elements are copied from src when the corresponding mask bit is not set).
36764/// mem_addr does not need to be aligned on any particular boundary.
36765///
36766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
36767#[inline]
36768#[target_feature(enable = "avx512f,avx512vl")]
36769#[cfg_attr(test, assert_instr(vmovupd))]
36770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36772pub const unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36773    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
36774    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x2()).as_m128d()
36775}
36776
36777/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36778/// (elements are zeroed out when the corresponding mask bit is not set).
36779/// mem_addr does not need to be aligned on any particular boundary.
36780///
36781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
36782#[inline]
36783#[target_feature(enable = "avx512f,avx512vl")]
36784#[cfg_attr(test, assert_instr(vmovupd))]
36785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36786#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36787pub const unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36788    _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
36789}
36790
36791/// Load packed 32-bit integers from memory into dst using writemask k
36792/// (elements are copied from src when the corresponding mask bit is not set).
36793/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36794///
36795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
36796#[inline]
36797#[target_feature(enable = "avx512f")]
36798#[cfg_attr(test, assert_instr(vmovdqa32))]
36799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36801pub const unsafe fn _mm512_mask_load_epi32(
36802    src: __m512i,
36803    k: __mmask16,
36804    mem_addr: *const i32,
36805) -> __m512i {
36806    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
36807    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x16()).as_m512i()
36808}
36809
36810/// Load packed 32-bit integers from memory into dst using zeromask k
36811/// (elements are zeroed out when the corresponding mask bit is not set).
36812/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36813///
36814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
36815#[inline]
36816#[target_feature(enable = "avx512f")]
36817#[cfg_attr(test, assert_instr(vmovdqa32))]
36818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36819#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36820pub const unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
36821    _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
36822}
36823
36824/// Load packed 64-bit integers from memory into dst using writemask k
36825/// (elements are copied from src when the corresponding mask bit is not set).
36826/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36827///
36828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
36829#[inline]
36830#[target_feature(enable = "avx512f")]
36831#[cfg_attr(test, assert_instr(vmovdqa64))]
36832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36833#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36834pub const unsafe fn _mm512_mask_load_epi64(
36835    src: __m512i,
36836    k: __mmask8,
36837    mem_addr: *const i64,
36838) -> __m512i {
36839    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
36840    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x8()).as_m512i()
36841}
36842
36843/// Load packed 64-bit integers from memory into dst using zeromask k
36844/// (elements are zeroed out when the corresponding mask bit is not set).
36845/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36846///
36847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
36848#[inline]
36849#[target_feature(enable = "avx512f")]
36850#[cfg_attr(test, assert_instr(vmovdqa64))]
36851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36852#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36853pub const unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
36854    _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
36855}
36856
36857/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36858/// (elements are copied from src when the corresponding mask bit is not set).
36859/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36860///
36861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
36862#[inline]
36863#[target_feature(enable = "avx512f")]
36864#[cfg_attr(test, assert_instr(vmovaps))]
36865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36866#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36867pub const unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
36868    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
36869    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x16()).as_m512()
36870}
36871
36872/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
36873/// (elements are zeroed out when the corresponding mask bit is not set).
36874/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36875///
36876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
36877#[inline]
36878#[target_feature(enable = "avx512f")]
36879#[cfg_attr(test, assert_instr(vmovaps))]
36880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36881#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36882pub const unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
36883    _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
36884}
36885
36886/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
36887/// (elements are copied from src when the corresponding mask bit is not set).
36888/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36889///
36890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
36891#[inline]
36892#[target_feature(enable = "avx512f")]
36893#[cfg_attr(test, assert_instr(vmovapd))]
36894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36895#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36896pub const unsafe fn _mm512_mask_load_pd(
36897    src: __m512d,
36898    k: __mmask8,
36899    mem_addr: *const f64,
36900) -> __m512d {
36901    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
36902    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x8()).as_m512d()
36903}
36904
36905/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
36906/// (elements are zeroed out when the corresponding mask bit is not set).
36907/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
36908///
36909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
36910#[inline]
36911#[target_feature(enable = "avx512f")]
36912#[cfg_attr(test, assert_instr(vmovapd))]
36913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36915pub const unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36916    _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
36917}
36918
36919/// Load packed 32-bit integers from memory into dst using writemask k
36920/// (elements are copied from src when the corresponding mask bit is not set).
36921/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36922///
36923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
36924#[inline]
36925#[target_feature(enable = "avx512f,avx512vl")]
36926#[cfg_attr(test, assert_instr(vmovdqa32))]
36927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36928#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36929pub const unsafe fn _mm256_mask_load_epi32(
36930    src: __m256i,
36931    k: __mmask8,
36932    mem_addr: *const i32,
36933) -> __m256i {
36934    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
36935    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x8()).as_m256i()
36936}
36937
36938/// Load packed 32-bit integers from memory into dst using zeromask k
36939/// (elements are zeroed out when the corresponding mask bit is not set).
36940/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36941///
36942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
36943#[inline]
36944#[target_feature(enable = "avx512f,avx512vl")]
36945#[cfg_attr(test, assert_instr(vmovdqa32))]
36946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36947#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36948pub const unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
36949    _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
36950}
36951
36952/// Load packed 64-bit integers from memory into dst using writemask k
36953/// (elements are copied from src when the corresponding mask bit is not set).
36954/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36955///
36956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
36957#[inline]
36958#[target_feature(enable = "avx512f,avx512vl")]
36959#[cfg_attr(test, assert_instr(vmovdqa64))]
36960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36961#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36962pub const unsafe fn _mm256_mask_load_epi64(
36963    src: __m256i,
36964    k: __mmask8,
36965    mem_addr: *const i64,
36966) -> __m256i {
36967    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
36968    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x4()).as_m256i()
36969}
36970
36971/// Load packed 64-bit integers from memory into dst using zeromask k
36972/// (elements are zeroed out when the corresponding mask bit is not set).
36973/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36974///
36975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
36976#[inline]
36977#[target_feature(enable = "avx512f,avx512vl")]
36978#[cfg_attr(test, assert_instr(vmovdqa64))]
36979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36981pub const unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
36982    _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
36983}
36984
36985/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
36986/// (elements are copied from src when the corresponding mask bit is not set).
36987/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
36988///
36989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
36990#[inline]
36991#[target_feature(enable = "avx512f,avx512vl")]
36992#[cfg_attr(test, assert_instr(vmovaps))]
36993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
36995pub const unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36996    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
36997    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x8()).as_m256()
36998}
36999
37000/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
37001/// (elements are zeroed out when the corresponding mask bit is not set).
37002/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37003///
37004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
37005#[inline]
37006#[target_feature(enable = "avx512f,avx512vl")]
37007#[cfg_attr(test, assert_instr(vmovaps))]
37008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37009#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37010pub const unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
37011    _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
37012}
37013
37014/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
37015/// (elements are copied from src when the corresponding mask bit is not set).
37016/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37017///
37018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
37019#[inline]
37020#[target_feature(enable = "avx512f,avx512vl")]
37021#[cfg_attr(test, assert_instr(vmovapd))]
37022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37023#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37024pub const unsafe fn _mm256_mask_load_pd(
37025    src: __m256d,
37026    k: __mmask8,
37027    mem_addr: *const f64,
37028) -> __m256d {
37029    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
37030    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x4()).as_m256d()
37031}
37032
37033/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
37034/// (elements are zeroed out when the corresponding mask bit is not set).
37035/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37036///
37037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
37038#[inline]
37039#[target_feature(enable = "avx512f,avx512vl")]
37040#[cfg_attr(test, assert_instr(vmovapd))]
37041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37042#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37043pub const unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
37044    _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
37045}
37046
37047/// Load packed 32-bit integers from memory into dst using writemask k
37048/// (elements are copied from src when the corresponding mask bit is not set).
37049/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37050///
37051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
37052#[inline]
37053#[target_feature(enable = "avx512f,avx512vl")]
37054#[cfg_attr(test, assert_instr(vmovdqa32))]
37055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37056#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37057pub const unsafe fn _mm_mask_load_epi32(
37058    src: __m128i,
37059    k: __mmask8,
37060    mem_addr: *const i32,
37061) -> __m128i {
37062    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
37063    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x4()).as_m128i()
37064}
37065
37066/// Load packed 32-bit integers from memory into dst using zeromask k
37067/// (elements are zeroed out when the corresponding mask bit is not set).
37068/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37069///
37070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
37071#[inline]
37072#[target_feature(enable = "avx512f,avx512vl")]
37073#[cfg_attr(test, assert_instr(vmovdqa32))]
37074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37075#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37076pub const unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
37077    _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
37078}
37079
37080/// Load packed 64-bit integers from memory into dst using writemask k
37081/// (elements are copied from src when the corresponding mask bit is not set).
37082/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37083///
37084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
37085#[inline]
37086#[target_feature(enable = "avx512f,avx512vl")]
37087#[cfg_attr(test, assert_instr(vmovdqa64))]
37088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37089#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37090pub const unsafe fn _mm_mask_load_epi64(
37091    src: __m128i,
37092    k: __mmask8,
37093    mem_addr: *const i64,
37094) -> __m128i {
37095    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
37096    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x2()).as_m128i()
37097}
37098
37099/// Load packed 64-bit integers from memory into dst using zeromask k
37100/// (elements are zeroed out when the corresponding mask bit is not set).
37101/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37102///
37103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
37104#[inline]
37105#[target_feature(enable = "avx512f,avx512vl")]
37106#[cfg_attr(test, assert_instr(vmovdqa64))]
37107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37108#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37109pub const unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
37110    _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
37111}
37112
37113/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
37114/// (elements are copied from src when the corresponding mask bit is not set).
37115/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37116///
37117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
37118#[inline]
37119#[target_feature(enable = "avx512f,avx512vl")]
37120#[cfg_attr(test, assert_instr(vmovaps))]
37121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37122#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37123pub const unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37124    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
37125    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x4()).as_m128()
37126}
37127
37128/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
37129/// (elements are zeroed out when the corresponding mask bit is not set).
37130/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37131///
37132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
37133#[inline]
37134#[target_feature(enable = "avx512f,avx512vl")]
37135#[cfg_attr(test, assert_instr(vmovaps))]
37136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37138pub const unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
37139    _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
37140}
37141
37142/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
37143/// (elements are copied from src when the corresponding mask bit is not set).
37144/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37145///
37146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
37147#[inline]
37148#[target_feature(enable = "avx512f,avx512vl")]
37149#[cfg_attr(test, assert_instr(vmovapd))]
37150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37151#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37152pub const unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37153    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
37154    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x2()).as_m128d()
37155}
37156
37157/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
37158/// (elements are zeroed out when the corresponding mask bit is not set).
37159/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37160///
37161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
37162#[inline]
37163#[target_feature(enable = "avx512f,avx512vl")]
37164#[cfg_attr(test, assert_instr(vmovapd))]
37165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37166#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37167pub const unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37168    _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
37169}
37170
37171/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
37172/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
37173/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37174/// exception may be generated.
37175///
37176/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
37177#[inline]
37178#[cfg_attr(test, assert_instr(vmovss))]
37179#[target_feature(enable = "avx512f")]
37180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37181pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37182    let mut dst: __m128 = src;
37183    asm!(
37184        vpl!("vmovss {dst}{{{k}}}"),
37185        p = in(reg) mem_addr,
37186        k = in(kreg) k,
37187        dst = inout(xmm_reg) dst,
37188        options(pure, readonly, nostack, preserves_flags),
37189    );
37190    dst
37191}
37192
37193/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
37194/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
37195/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37196/// exception may be generated.
37197///
37198/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
37199#[inline]
37200#[cfg_attr(test, assert_instr(vmovss))]
37201#[target_feature(enable = "avx512f")]
37202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37203pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
37204    let mut dst: __m128;
37205    asm!(
37206        vpl!("vmovss {dst}{{{k}}} {{z}}"),
37207        p = in(reg) mem_addr,
37208        k = in(kreg) k,
37209        dst = out(xmm_reg) dst,
37210        options(pure, readonly, nostack, preserves_flags),
37211    );
37212    dst
37213}
37214
37215/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
37216/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
37217/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
37218/// exception may be generated.
37219///
37220/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
37221#[inline]
37222#[cfg_attr(test, assert_instr(vmovsd))]
37223#[target_feature(enable = "avx512f")]
37224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37225pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37226    let mut dst: __m128d = src;
37227    asm!(
37228        vpl!("vmovsd {dst}{{{k}}}"),
37229        p = in(reg) mem_addr,
37230        k = in(kreg) k,
37231        dst = inout(xmm_reg) dst,
37232        options(pure, readonly, nostack, preserves_flags),
37233    );
37234    dst
37235}
37236
37237/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
37238/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
37239/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
37240/// may be generated.
37241///
37242/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
37243#[inline]
37244#[cfg_attr(test, assert_instr(vmovsd))]
37245#[target_feature(enable = "avx512f")]
37246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37247pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37248    let mut dst: __m128d;
37249    asm!(
37250        vpl!("vmovsd {dst}{{{k}}} {{z}}"),
37251        p = in(reg) mem_addr,
37252        k = in(kreg) k,
37253        dst = out(xmm_reg) dst,
37254        options(pure, readonly, nostack, preserves_flags),
37255    );
37256    dst
37257}
37258
37259/// Store packed 32-bit integers from a into memory using writemask k.
37260/// mem_addr does not need to be aligned on any particular boundary.
37261///
37262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
37263#[inline]
37264#[target_feature(enable = "avx512f")]
37265#[cfg_attr(test, assert_instr(vmovdqu32))]
37266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37267#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37268pub const unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
37269    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
37270    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x16());
37271}
37272
37273/// Store packed 64-bit integers from a into memory using writemask k.
37274/// mem_addr does not need to be aligned on any particular boundary.
37275///
37276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
37277#[inline]
37278#[target_feature(enable = "avx512f")]
37279#[cfg_attr(test, assert_instr(vmovdqu64))]
37280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37282pub const unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
37283    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
37284    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x8());
37285}
37286
37287/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37288/// mem_addr does not need to be aligned on any particular boundary.
37289///
37290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
37291#[inline]
37292#[target_feature(enable = "avx512f")]
37293#[cfg_attr(test, assert_instr(vmovups))]
37294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37295#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37296pub const unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
37297    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
37298    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x16());
37299}
37300
37301/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37302/// mem_addr does not need to be aligned on any particular boundary.
37303///
37304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
37305#[inline]
37306#[target_feature(enable = "avx512f")]
37307#[cfg_attr(test, assert_instr(vmovupd))]
37308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37309#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37310pub const unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
37311    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
37312    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x8());
37313}
37314
37315/// Store packed 32-bit integers from a into memory using writemask k.
37316/// mem_addr does not need to be aligned on any particular boundary.
37317///
37318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
37319#[inline]
37320#[target_feature(enable = "avx512f,avx512vl")]
37321#[cfg_attr(test, assert_instr(vmovdqu32))]
37322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37323#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37324pub const unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
37325    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
37326    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8());
37327}
37328
37329/// Store packed 64-bit integers from a into memory using writemask k.
37330/// mem_addr does not need to be aligned on any particular boundary.
37331///
37332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
37333#[inline]
37334#[target_feature(enable = "avx512f,avx512vl")]
37335#[cfg_attr(test, assert_instr(vmovdqu64))]
37336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37337#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37338pub const unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
37339    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
37340    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4());
37341}
37342
37343/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37344/// mem_addr does not need to be aligned on any particular boundary.
37345///
37346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
37347#[inline]
37348#[target_feature(enable = "avx512f,avx512vl")]
37349#[cfg_attr(test, assert_instr(vmovups))]
37350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37351#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37352pub const unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
37353    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
37354    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x8());
37355}
37356
37357/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37358/// mem_addr does not need to be aligned on any particular boundary.
37359///
37360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
37361#[inline]
37362#[target_feature(enable = "avx512f,avx512vl")]
37363#[cfg_attr(test, assert_instr(vmovupd))]
37364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37365#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37366pub const unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
37367    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
37368    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x4());
37369}
37370
37371/// Store packed 32-bit integers from a into memory using writemask k.
37372/// mem_addr does not need to be aligned on any particular boundary.
37373///
37374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
37375#[inline]
37376#[target_feature(enable = "avx512f,avx512vl")]
37377#[cfg_attr(test, assert_instr(vmovdqu32))]
37378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37379#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37380pub const unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
37381    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
37382    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4());
37383}
37384
37385/// Store packed 64-bit integers from a into memory using writemask k.
37386/// mem_addr does not need to be aligned on any particular boundary.
37387///
37388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
37389#[inline]
37390#[target_feature(enable = "avx512f,avx512vl")]
37391#[cfg_attr(test, assert_instr(vmovdqu64))]
37392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37393#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37394pub const unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
37395    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
37396    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2());
37397}
37398
37399/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37400/// mem_addr does not need to be aligned on any particular boundary.
37401///
37402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
37403#[inline]
37404#[target_feature(enable = "avx512f,avx512vl")]
37405#[cfg_attr(test, assert_instr(vmovups))]
37406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37407#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37408pub const unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
37409    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
37410    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x4());
37411}
37412
37413/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37414/// mem_addr does not need to be aligned on any particular boundary.
37415///
37416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
37417#[inline]
37418#[target_feature(enable = "avx512f,avx512vl")]
37419#[cfg_attr(test, assert_instr(vmovupd))]
37420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37421#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37422pub const unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
37423    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
37424    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x2());
37425}
37426
37427/// Store packed 32-bit integers from a into memory using writemask k.
37428/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37429///
37430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
37431#[inline]
37432#[target_feature(enable = "avx512f")]
37433#[cfg_attr(test, assert_instr(vmovdqa32))]
37434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37435#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37436pub const unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
37437    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
37438    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x16());
37439}
37440
37441/// Store packed 64-bit integers from a into memory using writemask k.
37442/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37443///
37444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
37445#[inline]
37446#[target_feature(enable = "avx512f")]
37447#[cfg_attr(test, assert_instr(vmovdqa64))]
37448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37450pub const unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
37451    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
37452    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x8());
37453}
37454
37455/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37456/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37457///
37458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
37459#[inline]
37460#[target_feature(enable = "avx512f")]
37461#[cfg_attr(test, assert_instr(vmovaps))]
37462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37463#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37464pub const unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
37465    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
37466    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x16());
37467}
37468
37469/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37470/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
37471///
37472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
37473#[inline]
37474#[target_feature(enable = "avx512f")]
37475#[cfg_attr(test, assert_instr(vmovapd))]
37476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37477#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37478pub const unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
37479    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
37480    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x8());
37481}
37482
37483/// Store packed 32-bit integers from a into memory using writemask k.
37484/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37485///
37486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
37487#[inline]
37488#[target_feature(enable = "avx512f,avx512vl")]
37489#[cfg_attr(test, assert_instr(vmovdqa32))]
37490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37492pub const unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
37493    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
37494    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x8());
37495}
37496
37497/// Store packed 64-bit integers from a into memory using writemask k.
37498/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37499///
37500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
37501#[inline]
37502#[target_feature(enable = "avx512f,avx512vl")]
37503#[cfg_attr(test, assert_instr(vmovdqa64))]
37504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37505#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37506pub const unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
37507    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
37508    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x4());
37509}
37510
37511/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37512/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37513///
37514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
37515#[inline]
37516#[target_feature(enable = "avx512f,avx512vl")]
37517#[cfg_attr(test, assert_instr(vmovaps))]
37518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37519#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37520pub const unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
37521    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
37522    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x8());
37523}
37524
37525/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37526/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
37527///
37528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
37529#[inline]
37530#[target_feature(enable = "avx512f,avx512vl")]
37531#[cfg_attr(test, assert_instr(vmovapd))]
37532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37533#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37534pub const unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
37535    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
37536    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x4());
37537}
37538
37539/// Store packed 32-bit integers from a into memory using writemask k.
37540/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37541///
37542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
37543#[inline]
37544#[target_feature(enable = "avx512f,avx512vl")]
37545#[cfg_attr(test, assert_instr(vmovdqa32))]
37546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37547#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37548pub const unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
37549    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
37550    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x4());
37551}
37552
37553/// Store packed 64-bit integers from a into memory using writemask k.
37554/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37555///
37556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
37557#[inline]
37558#[target_feature(enable = "avx512f,avx512vl")]
37559#[cfg_attr(test, assert_instr(vmovdqa64))]
37560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37561#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37562pub const unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
37563    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
37564    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x2());
37565}
37566
37567/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
37568/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37569///
37570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
37571#[inline]
37572#[target_feature(enable = "avx512f,avx512vl")]
37573#[cfg_attr(test, assert_instr(vmovaps))]
37574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37575#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37576pub const unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
37577    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
37578    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x4());
37579}
37580
37581/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
37582/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37583///
37584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
37585#[inline]
37586#[target_feature(enable = "avx512f,avx512vl")]
37587#[cfg_attr(test, assert_instr(vmovapd))]
37588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37589#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37590pub const unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
37591    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
37592    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x2());
37593}
37594
37595/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
37596/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37597///
37598/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
37599#[inline]
37600#[cfg_attr(test, assert_instr(vmovss))]
37601#[target_feature(enable = "avx512f")]
37602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37603pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
37604    asm!(
37605        vps!("vmovss", "{{{k}}}, {a}"),
37606        p = in(reg) mem_addr,
37607        k = in(kreg) k,
37608        a = in(xmm_reg) a,
37609        options(nostack, preserves_flags),
37610    );
37611}
37612
37613/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
37614/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
37615///
37616/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
37617#[inline]
37618#[cfg_attr(test, assert_instr(vmovsd))]
37619#[target_feature(enable = "avx512f")]
37620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37621pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
37622    asm!(
37623        vps!("vmovsd", "{{{k}}}, {a}"),
37624        p = in(reg) mem_addr,
37625        k = in(kreg) k,
37626        a = in(xmm_reg) a,
37627        options(nostack, preserves_flags),
37628    );
37629}
37630
37631/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37632///
37633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
37634#[inline]
37635#[target_feature(enable = "avx512f")]
37636#[cfg_attr(test, assert_instr(vpexpandd))]
37637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37638pub unsafe fn _mm512_mask_expandloadu_epi32(
37639    src: __m512i,
37640    k: __mmask16,
37641    mem_addr: *const i32,
37642) -> __m512i {
37643    transmute(expandloadd_512(mem_addr, src.as_i32x16(), k))
37644}
37645
37646/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37647///
37648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
37649#[inline]
37650#[target_feature(enable = "avx512f")]
37651#[cfg_attr(test, assert_instr(vpexpandd))]
37652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37653pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
37654    _mm512_mask_expandloadu_epi32(_mm512_setzero_si512(), k, mem_addr)
37655}
37656
37657/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37658///
37659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
37660#[inline]
37661#[target_feature(enable = "avx512f,avx512vl")]
37662#[cfg_attr(test, assert_instr(vpexpandd))]
37663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37664pub unsafe fn _mm256_mask_expandloadu_epi32(
37665    src: __m256i,
37666    k: __mmask8,
37667    mem_addr: *const i32,
37668) -> __m256i {
37669    transmute(expandloadd_256(mem_addr, src.as_i32x8(), k))
37670}
37671
37672/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37673///
37674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
37675#[inline]
37676#[target_feature(enable = "avx512f,avx512vl")]
37677#[cfg_attr(test, assert_instr(vpexpandd))]
37678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37679pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
37680    _mm256_mask_expandloadu_epi32(_mm256_setzero_si256(), k, mem_addr)
37681}
37682
37683/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37684///
37685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
37686#[inline]
37687#[target_feature(enable = "avx512f,avx512vl")]
37688#[cfg_attr(test, assert_instr(vpexpandd))]
37689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37690pub unsafe fn _mm_mask_expandloadu_epi32(
37691    src: __m128i,
37692    k: __mmask8,
37693    mem_addr: *const i32,
37694) -> __m128i {
37695    transmute(expandloadd_128(mem_addr, src.as_i32x4(), k))
37696}
37697
37698/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37699///
37700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
37701#[inline]
37702#[target_feature(enable = "avx512f,avx512vl")]
37703#[cfg_attr(test, assert_instr(vpexpandd))]
37704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37705pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
37706    _mm_mask_expandloadu_epi32(_mm_setzero_si128(), k, mem_addr)
37707}
37708
37709/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37710///
37711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
37712#[inline]
37713#[target_feature(enable = "avx512f")]
37714#[cfg_attr(test, assert_instr(vpexpandq))]
37715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37716pub unsafe fn _mm512_mask_expandloadu_epi64(
37717    src: __m512i,
37718    k: __mmask8,
37719    mem_addr: *const i64,
37720) -> __m512i {
37721    transmute(expandloadq_512(mem_addr, src.as_i64x8(), k))
37722}
37723
37724/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37725///
37726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
37727#[inline]
37728#[target_feature(enable = "avx512f")]
37729#[cfg_attr(test, assert_instr(vpexpandq))]
37730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37731pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
37732    _mm512_mask_expandloadu_epi64(_mm512_setzero_si512(), k, mem_addr)
37733}
37734
37735/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37736///
37737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
37738#[inline]
37739#[target_feature(enable = "avx512f,avx512vl")]
37740#[cfg_attr(test, assert_instr(vpexpandq))]
37741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37742pub unsafe fn _mm256_mask_expandloadu_epi64(
37743    src: __m256i,
37744    k: __mmask8,
37745    mem_addr: *const i64,
37746) -> __m256i {
37747    transmute(expandloadq_256(mem_addr, src.as_i64x4(), k))
37748}
37749
37750/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37751///
37752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
37753#[inline]
37754#[target_feature(enable = "avx512f,avx512vl")]
37755#[cfg_attr(test, assert_instr(vpexpandq))]
37756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37757pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
37758    _mm256_mask_expandloadu_epi64(_mm256_setzero_si256(), k, mem_addr)
37759}
37760
37761/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37762///
37763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
37764#[inline]
37765#[target_feature(enable = "avx512f,avx512vl")]
37766#[cfg_attr(test, assert_instr(vpexpandq))]
37767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37768pub unsafe fn _mm_mask_expandloadu_epi64(
37769    src: __m128i,
37770    k: __mmask8,
37771    mem_addr: *const i64,
37772) -> __m128i {
37773    transmute(expandloadq_128(mem_addr, src.as_i64x2(), k))
37774}
37775
37776/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37777///
37778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
37779#[inline]
37780#[target_feature(enable = "avx512f,avx512vl")]
37781#[cfg_attr(test, assert_instr(vpexpandq))]
37782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37783pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
37784    _mm_mask_expandloadu_epi64(_mm_setzero_si128(), k, mem_addr)
37785}
37786
37787/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37788///
37789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
37790#[inline]
37791#[target_feature(enable = "avx512f")]
37792#[cfg_attr(test, assert_instr(vexpandps))]
37793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37794pub unsafe fn _mm512_mask_expandloadu_ps(
37795    src: __m512,
37796    k: __mmask16,
37797    mem_addr: *const f32,
37798) -> __m512 {
37799    transmute(expandloadps_512(mem_addr, src.as_f32x16(), k))
37800}
37801
37802/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37803///
37804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
37805#[inline]
37806#[target_feature(enable = "avx512f")]
37807#[cfg_attr(test, assert_instr(vexpandps))]
37808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37809pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
37810    _mm512_mask_expandloadu_ps(_mm512_setzero_ps(), k, mem_addr)
37811}
37812
37813/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37814///
37815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
37816#[inline]
37817#[target_feature(enable = "avx512f,avx512vl")]
37818#[cfg_attr(test, assert_instr(vexpandps))]
37819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37820pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
37821    transmute(expandloadps_256(mem_addr, src.as_f32x8(), k))
37822}
37823
37824/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37825///
37826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
37827#[inline]
37828#[target_feature(enable = "avx512f,avx512vl")]
37829#[cfg_attr(test, assert_instr(vexpandps))]
37830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37831pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
37832    _mm256_mask_expandloadu_ps(_mm256_setzero_ps(), k, mem_addr)
37833}
37834
37835/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37836///
37837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
37838#[inline]
37839#[target_feature(enable = "avx512f,avx512vl")]
37840#[cfg_attr(test, assert_instr(vexpandps))]
37841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37842pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
37843    transmute(expandloadps_128(mem_addr, src.as_f32x4(), k))
37844}
37845
37846/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37847///
37848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
37849#[inline]
37850#[target_feature(enable = "avx512f,avx512vl")]
37851#[cfg_attr(test, assert_instr(vexpandps))]
37852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37853pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
37854    _mm_mask_expandloadu_ps(_mm_setzero_ps(), k, mem_addr)
37855}
37856
37857/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37858///
37859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
37860#[inline]
37861#[target_feature(enable = "avx512f")]
37862#[cfg_attr(test, assert_instr(vexpandpd))]
37863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37864pub unsafe fn _mm512_mask_expandloadu_pd(
37865    src: __m512d,
37866    k: __mmask8,
37867    mem_addr: *const f64,
37868) -> __m512d {
37869    transmute(expandloadpd_512(mem_addr, src.as_f64x8(), k))
37870}
37871
37872/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37873///
37874/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
37875#[inline]
37876#[target_feature(enable = "avx512f")]
37877#[cfg_attr(test, assert_instr(vexpandpd))]
37878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37879pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
37880    _mm512_mask_expandloadu_pd(_mm512_setzero_pd(), k, mem_addr)
37881}
37882
37883/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37884///
37885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
37886#[inline]
37887#[target_feature(enable = "avx512f,avx512vl")]
37888#[cfg_attr(test, assert_instr(vexpandpd))]
37889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37890pub unsafe fn _mm256_mask_expandloadu_pd(
37891    src: __m256d,
37892    k: __mmask8,
37893    mem_addr: *const f64,
37894) -> __m256d {
37895    transmute(expandloadpd_256(mem_addr, src.as_f64x4(), k))
37896}
37897
37898/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37899///
37900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
37901#[inline]
37902#[target_feature(enable = "avx512f,avx512vl")]
37903#[cfg_attr(test, assert_instr(vexpandpd))]
37904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37905pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
37906    _mm256_mask_expandloadu_pd(_mm256_setzero_pd(), k, mem_addr)
37907}
37908
37909/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
37910///
37911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
37912#[inline]
37913#[target_feature(enable = "avx512f,avx512vl")]
37914#[cfg_attr(test, assert_instr(vexpandpd))]
37915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37916pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
37917    transmute(expandloadpd_128(mem_addr, src.as_f64x2(), k))
37918}
37919
37920/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
37921///
37922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
37923#[inline]
37924#[target_feature(enable = "avx512f,avx512vl")]
37925#[cfg_attr(test, assert_instr(vexpandpd))]
37926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37927pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
37928    _mm_mask_expandloadu_pd(_mm_setzero_pd(), k, mem_addr)
37929}
37930
37931/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
37932///
37933/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
37934#[inline]
37935#[target_feature(enable = "avx512f")]
37936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37937#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37938pub const fn _mm512_setr_pd(
37939    e0: f64,
37940    e1: f64,
37941    e2: f64,
37942    e3: f64,
37943    e4: f64,
37944    e5: f64,
37945    e6: f64,
37946    e7: f64,
37947) -> __m512d {
37948    unsafe {
37949        let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
37950        transmute(r)
37951    }
37952}
37953
37954/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
37955///
37956/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
37957#[inline]
37958#[target_feature(enable = "avx512f")]
37959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37960#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37961pub const fn _mm512_set_pd(
37962    e0: f64,
37963    e1: f64,
37964    e2: f64,
37965    e3: f64,
37966    e4: f64,
37967    e5: f64,
37968    e6: f64,
37969    e7: f64,
37970) -> __m512d {
37971    _mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
37972}
37973
37974/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37975///
37976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
37977#[inline]
37978#[target_feature(enable = "avx512f")]
37979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37980#[cfg_attr(test, assert_instr(vmovss))]
37981#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
37982pub const fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37983    unsafe {
37984        let extractsrc: f32 = simd_extract!(src, 0);
37985        let mut mov: f32 = extractsrc;
37986        if (k & 0b00000001) != 0 {
37987            mov = simd_extract!(b, 0);
37988        }
37989        simd_insert!(a, 0, mov)
37990    }
37991}
37992
37993/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37994///
37995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
37996#[inline]
37997#[target_feature(enable = "avx512f")]
37998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37999#[cfg_attr(test, assert_instr(vmovss))]
38000#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38001pub const fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38002    unsafe {
38003        let mut mov: f32 = 0.;
38004        if (k & 0b00000001) != 0 {
38005            mov = simd_extract!(b, 0);
38006        }
38007        simd_insert!(a, 0, mov)
38008    }
38009}
38010
38011/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38012///
38013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
38014#[inline]
38015#[target_feature(enable = "avx512f")]
38016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38017#[cfg_attr(test, assert_instr(vmovsd))]
38018#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38019pub const fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38020    unsafe {
38021        let extractsrc: f64 = simd_extract!(src, 0);
38022        let mut mov: f64 = extractsrc;
38023        if (k & 0b00000001) != 0 {
38024            mov = simd_extract!(b, 0);
38025        }
38026        simd_insert!(a, 0, mov)
38027    }
38028}
38029
38030/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38031///
38032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
38033#[inline]
38034#[target_feature(enable = "avx512f")]
38035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38036#[cfg_attr(test, assert_instr(vmovsd))]
38037#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38038pub const fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38039    unsafe {
38040        let mut mov: f64 = 0.;
38041        if (k & 0b00000001) != 0 {
38042            mov = simd_extract!(b, 0);
38043        }
38044        simd_insert!(a, 0, mov)
38045    }
38046}
38047
38048/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38049///
38050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
38051#[inline]
38052#[target_feature(enable = "avx512f")]
38053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38054#[cfg_attr(test, assert_instr(vaddss))]
38055#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38056pub const fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38057    unsafe {
38058        let extractsrc: f32 = simd_extract!(src, 0);
38059        let mut add: f32 = extractsrc;
38060        if (k & 0b00000001) != 0 {
38061            let extracta: f32 = simd_extract!(a, 0);
38062            let extractb: f32 = simd_extract!(b, 0);
38063            add = extracta + extractb;
38064        }
38065        simd_insert!(a, 0, add)
38066    }
38067}
38068
38069/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38070///
38071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
38072#[inline]
38073#[target_feature(enable = "avx512f")]
38074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38075#[cfg_attr(test, assert_instr(vaddss))]
38076#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38077pub const fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38078    unsafe {
38079        let mut add: f32 = 0.;
38080        if (k & 0b00000001) != 0 {
38081            let extracta: f32 = simd_extract!(a, 0);
38082            let extractb: f32 = simd_extract!(b, 0);
38083            add = extracta + extractb;
38084        }
38085        simd_insert!(a, 0, add)
38086    }
38087}
38088
38089/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38090///
38091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
38092#[inline]
38093#[target_feature(enable = "avx512f")]
38094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38095#[cfg_attr(test, assert_instr(vaddsd))]
38096#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38097pub const fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38098    unsafe {
38099        let extractsrc: f64 = simd_extract!(src, 0);
38100        let mut add: f64 = extractsrc;
38101        if (k & 0b00000001) != 0 {
38102            let extracta: f64 = simd_extract!(a, 0);
38103            let extractb: f64 = simd_extract!(b, 0);
38104            add = extracta + extractb;
38105        }
38106        simd_insert!(a, 0, add)
38107    }
38108}
38109
38110/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38111///
38112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
38113#[inline]
38114#[target_feature(enable = "avx512f")]
38115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38116#[cfg_attr(test, assert_instr(vaddsd))]
38117#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38118pub const fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38119    unsafe {
38120        let mut add: f64 = 0.;
38121        if (k & 0b00000001) != 0 {
38122            let extracta: f64 = simd_extract!(a, 0);
38123            let extractb: f64 = simd_extract!(b, 0);
38124            add = extracta + extractb;
38125        }
38126        simd_insert!(a, 0, add)
38127    }
38128}
38129
38130/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38131///
38132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
38133#[inline]
38134#[target_feature(enable = "avx512f")]
38135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38136#[cfg_attr(test, assert_instr(vsubss))]
38137#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38138pub const fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38139    unsafe {
38140        let extractsrc: f32 = simd_extract!(src, 0);
38141        let mut add: f32 = extractsrc;
38142        if (k & 0b00000001) != 0 {
38143            let extracta: f32 = simd_extract!(a, 0);
38144            let extractb: f32 = simd_extract!(b, 0);
38145            add = extracta - extractb;
38146        }
38147        simd_insert!(a, 0, add)
38148    }
38149}
38150
38151/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38152///
38153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
38154#[inline]
38155#[target_feature(enable = "avx512f")]
38156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38157#[cfg_attr(test, assert_instr(vsubss))]
38158#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38159pub const fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38160    unsafe {
38161        let mut add: f32 = 0.;
38162        if (k & 0b00000001) != 0 {
38163            let extracta: f32 = simd_extract!(a, 0);
38164            let extractb: f32 = simd_extract!(b, 0);
38165            add = extracta - extractb;
38166        }
38167        simd_insert!(a, 0, add)
38168    }
38169}
38170
38171/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38172///
38173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
38174#[inline]
38175#[target_feature(enable = "avx512f")]
38176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38177#[cfg_attr(test, assert_instr(vsubsd))]
38178#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38179pub const fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38180    unsafe {
38181        let extractsrc: f64 = simd_extract!(src, 0);
38182        let mut add: f64 = extractsrc;
38183        if (k & 0b00000001) != 0 {
38184            let extracta: f64 = simd_extract!(a, 0);
38185            let extractb: f64 = simd_extract!(b, 0);
38186            add = extracta - extractb;
38187        }
38188        simd_insert!(a, 0, add)
38189    }
38190}
38191
38192/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38193///
38194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
38195#[inline]
38196#[target_feature(enable = "avx512f")]
38197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38198#[cfg_attr(test, assert_instr(vsubsd))]
38199#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38200pub const fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38201    unsafe {
38202        let mut add: f64 = 0.;
38203        if (k & 0b00000001) != 0 {
38204            let extracta: f64 = simd_extract!(a, 0);
38205            let extractb: f64 = simd_extract!(b, 0);
38206            add = extracta - extractb;
38207        }
38208        simd_insert!(a, 0, add)
38209    }
38210}
38211
38212/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38213///
38214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
38215#[inline]
38216#[target_feature(enable = "avx512f")]
38217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38218#[cfg_attr(test, assert_instr(vmulss))]
38219#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38220pub const fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38221    unsafe {
38222        let extractsrc: f32 = simd_extract!(src, 0);
38223        let mut add: f32 = extractsrc;
38224        if (k & 0b00000001) != 0 {
38225            let extracta: f32 = simd_extract!(a, 0);
38226            let extractb: f32 = simd_extract!(b, 0);
38227            add = extracta * extractb;
38228        }
38229        simd_insert!(a, 0, add)
38230    }
38231}
38232
38233/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38234///
38235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
38236#[inline]
38237#[target_feature(enable = "avx512f")]
38238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38239#[cfg_attr(test, assert_instr(vmulss))]
38240#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38241pub const fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38242    unsafe {
38243        let mut add: f32 = 0.;
38244        if (k & 0b00000001) != 0 {
38245            let extracta: f32 = simd_extract!(a, 0);
38246            let extractb: f32 = simd_extract!(b, 0);
38247            add = extracta * extractb;
38248        }
38249        simd_insert!(a, 0, add)
38250    }
38251}
38252
38253/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38254///
38255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
38256#[inline]
38257#[target_feature(enable = "avx512f")]
38258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38259#[cfg_attr(test, assert_instr(vmulsd))]
38260#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38261pub const fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38262    unsafe {
38263        let extractsrc: f64 = simd_extract!(src, 0);
38264        let mut add: f64 = extractsrc;
38265        if (k & 0b00000001) != 0 {
38266            let extracta: f64 = simd_extract!(a, 0);
38267            let extractb: f64 = simd_extract!(b, 0);
38268            add = extracta * extractb;
38269        }
38270        simd_insert!(a, 0, add)
38271    }
38272}
38273
38274/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38275///
38276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
38277#[inline]
38278#[target_feature(enable = "avx512f")]
38279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38280#[cfg_attr(test, assert_instr(vmulsd))]
38281#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38282pub const fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38283    unsafe {
38284        let mut add: f64 = 0.;
38285        if (k & 0b00000001) != 0 {
38286            let extracta: f64 = simd_extract!(a, 0);
38287            let extractb: f64 = simd_extract!(b, 0);
38288            add = extracta * extractb;
38289        }
38290        simd_insert!(a, 0, add)
38291    }
38292}
38293
38294/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38295///
38296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
38297#[inline]
38298#[target_feature(enable = "avx512f")]
38299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38300#[cfg_attr(test, assert_instr(vdivss))]
38301#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38302pub const fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38303    unsafe {
38304        let extractsrc: f32 = simd_extract!(src, 0);
38305        let mut add: f32 = extractsrc;
38306        if (k & 0b00000001) != 0 {
38307            let extracta: f32 = simd_extract!(a, 0);
38308            let extractb: f32 = simd_extract!(b, 0);
38309            add = extracta / extractb;
38310        }
38311        simd_insert!(a, 0, add)
38312    }
38313}
38314
38315/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38316///
38317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
38318#[inline]
38319#[target_feature(enable = "avx512f")]
38320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38321#[cfg_attr(test, assert_instr(vdivss))]
38322#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38323pub const fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38324    unsafe {
38325        let mut add: f32 = 0.;
38326        if (k & 0b00000001) != 0 {
38327            let extracta: f32 = simd_extract!(a, 0);
38328            let extractb: f32 = simd_extract!(b, 0);
38329            add = extracta / extractb;
38330        }
38331        simd_insert!(a, 0, add)
38332    }
38333}
38334
38335/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38336///
38337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
38338#[inline]
38339#[target_feature(enable = "avx512f")]
38340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38341#[cfg_attr(test, assert_instr(vdivsd))]
38342#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38343pub const fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38344    unsafe {
38345        let extractsrc: f64 = simd_extract!(src, 0);
38346        let mut add: f64 = extractsrc;
38347        if (k & 0b00000001) != 0 {
38348            let extracta: f64 = simd_extract!(a, 0);
38349            let extractb: f64 = simd_extract!(b, 0);
38350            add = extracta / extractb;
38351        }
38352        simd_insert!(a, 0, add)
38353    }
38354}
38355
38356/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38357///
38358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
38359#[inline]
38360#[target_feature(enable = "avx512f")]
38361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38362#[cfg_attr(test, assert_instr(vdivsd))]
38363#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
38364pub const fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38365    unsafe {
38366        let mut add: f64 = 0.;
38367        if (k & 0b00000001) != 0 {
38368            let extracta: f64 = simd_extract!(a, 0);
38369            let extractb: f64 = simd_extract!(b, 0);
38370            add = extracta / extractb;
38371        }
38372        simd_insert!(a, 0, add)
38373    }
38374}
38375
38376/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38377///
38378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
38379#[inline]
38380#[target_feature(enable = "avx512f")]
38381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38382#[cfg_attr(test, assert_instr(vmaxss))]
38383pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38384    unsafe {
38385        transmute(vmaxss(
38386            a.as_f32x4(),
38387            b.as_f32x4(),
38388            src.as_f32x4(),
38389            k,
38390            _MM_FROUND_CUR_DIRECTION,
38391        ))
38392    }
38393}
38394
38395/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38396///
38397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
38398#[inline]
38399#[target_feature(enable = "avx512f")]
38400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38401#[cfg_attr(test, assert_instr(vmaxss))]
38402pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38403    unsafe {
38404        transmute(vmaxss(
38405            a.as_f32x4(),
38406            b.as_f32x4(),
38407            f32x4::ZERO,
38408            k,
38409            _MM_FROUND_CUR_DIRECTION,
38410        ))
38411    }
38412}
38413
38414/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38415///
38416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
38417#[inline]
38418#[target_feature(enable = "avx512f")]
38419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38420#[cfg_attr(test, assert_instr(vmaxsd))]
38421pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38422    unsafe {
38423        transmute(vmaxsd(
38424            a.as_f64x2(),
38425            b.as_f64x2(),
38426            src.as_f64x2(),
38427            k,
38428            _MM_FROUND_CUR_DIRECTION,
38429        ))
38430    }
38431}
38432
38433/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38434///
38435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
38436#[inline]
38437#[target_feature(enable = "avx512f")]
38438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38439#[cfg_attr(test, assert_instr(vmaxsd))]
38440pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38441    unsafe {
38442        transmute(vmaxsd(
38443            a.as_f64x2(),
38444            b.as_f64x2(),
38445            f64x2::ZERO,
38446            k,
38447            _MM_FROUND_CUR_DIRECTION,
38448        ))
38449    }
38450}
38451
38452/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38453///
38454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
38455#[inline]
38456#[target_feature(enable = "avx512f")]
38457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38458#[cfg_attr(test, assert_instr(vminss))]
38459pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38460    unsafe {
38461        transmute(vminss(
38462            a.as_f32x4(),
38463            b.as_f32x4(),
38464            src.as_f32x4(),
38465            k,
38466            _MM_FROUND_CUR_DIRECTION,
38467        ))
38468    }
38469}
38470
38471/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38472///
38473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
38474#[inline]
38475#[target_feature(enable = "avx512f")]
38476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38477#[cfg_attr(test, assert_instr(vminss))]
38478pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38479    unsafe {
38480        transmute(vminss(
38481            a.as_f32x4(),
38482            b.as_f32x4(),
38483            f32x4::ZERO,
38484            k,
38485            _MM_FROUND_CUR_DIRECTION,
38486        ))
38487    }
38488}
38489
38490/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38491///
38492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
38493#[inline]
38494#[target_feature(enable = "avx512f")]
38495#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38496#[cfg_attr(test, assert_instr(vminsd))]
38497pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38498    unsafe {
38499        transmute(vminsd(
38500            a.as_f64x2(),
38501            b.as_f64x2(),
38502            src.as_f64x2(),
38503            k,
38504            _MM_FROUND_CUR_DIRECTION,
38505        ))
38506    }
38507}
38508
38509/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38510///
38511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
38512#[inline]
38513#[target_feature(enable = "avx512f")]
38514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38515#[cfg_attr(test, assert_instr(vminsd))]
38516pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38517    unsafe {
38518        transmute(vminsd(
38519            a.as_f64x2(),
38520            b.as_f64x2(),
38521            f64x2::ZERO,
38522            k,
38523            _MM_FROUND_CUR_DIRECTION,
38524        ))
38525    }
38526}
38527
38528/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38529///
38530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
38531#[inline]
38532#[target_feature(enable = "avx512f")]
38533#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38534#[cfg_attr(test, assert_instr(vsqrtss))]
38535pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38536    unsafe { vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
38537}
38538
38539/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
38540///
38541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
38542#[inline]
38543#[target_feature(enable = "avx512f")]
38544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38545#[cfg_attr(test, assert_instr(vsqrtss))]
38546pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38547    unsafe { vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION) }
38548}
38549
38550/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38551///
38552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
38553#[inline]
38554#[target_feature(enable = "avx512f")]
38555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38556#[cfg_attr(test, assert_instr(vsqrtsd))]
38557pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38558    unsafe { vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
38559}
38560
38561/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
38562///
38563/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
38564#[inline]
38565#[target_feature(enable = "avx512f")]
38566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38567#[cfg_attr(test, assert_instr(vsqrtsd))]
38568pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38569    unsafe { vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION) }
38570}
38571
38572/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38573///
38574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
38575#[inline]
38576#[target_feature(enable = "avx512f")]
38577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38578#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38579pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
38580    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
38581}
38582
38583/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38584///
38585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
38586#[inline]
38587#[target_feature(enable = "avx512f")]
38588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38589#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38590pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38591    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
38592}
38593
38594/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38595///
38596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
38597#[inline]
38598#[target_feature(enable = "avx512f")]
38599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38600#[cfg_attr(test, assert_instr(vrsqrt14ss))]
38601pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38602    unsafe { transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
38603}
38604
38605/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38606///
38607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
38608#[inline]
38609#[target_feature(enable = "avx512f")]
38610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38611#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38612pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
38613    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
38614}
38615
38616/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38617///
38618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
38619#[inline]
38620#[target_feature(enable = "avx512f")]
38621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38622#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38623pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38624    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
38625}
38626
38627/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38628///
38629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
38630#[inline]
38631#[target_feature(enable = "avx512f")]
38632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38633#[cfg_attr(test, assert_instr(vrsqrt14sd))]
38634pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38635    unsafe { transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
38636}
38637
38638/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38639///
38640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
38641#[inline]
38642#[target_feature(enable = "avx512f")]
38643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38644#[cfg_attr(test, assert_instr(vrcp14ss))]
38645pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
38646    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, 0b1)) }
38647}
38648
38649/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38650///
38651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
38652#[inline]
38653#[target_feature(enable = "avx512f")]
38654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38655#[cfg_attr(test, assert_instr(vrcp14ss))]
38656pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38657    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k)) }
38658}
38659
38660/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
38661///
38662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
38663#[inline]
38664#[target_feature(enable = "avx512f")]
38665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38666#[cfg_attr(test, assert_instr(vrcp14ss))]
38667pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38668    unsafe { transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), f32x4::ZERO, k)) }
38669}
38670
38671/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38672///
38673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
38674#[inline]
38675#[target_feature(enable = "avx512f")]
38676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38677#[cfg_attr(test, assert_instr(vrcp14sd))]
38678pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
38679    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, 0b1)) }
38680}
38681
38682/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38683///
38684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
38685#[inline]
38686#[target_feature(enable = "avx512f")]
38687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38688#[cfg_attr(test, assert_instr(vrcp14sd))]
38689pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38690    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k)) }
38691}
38692
38693/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
38694///
38695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
38696#[inline]
38697#[target_feature(enable = "avx512f")]
38698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38699#[cfg_attr(test, assert_instr(vrcp14sd))]
38700pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38701    unsafe { transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), f64x2::ZERO, k)) }
38702}
38703
38704/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38705///
38706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
38707#[inline]
38708#[target_feature(enable = "avx512f")]
38709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38710#[cfg_attr(test, assert_instr(vgetexpss))]
38711pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
38712    unsafe {
38713        transmute(vgetexpss(
38714            a.as_f32x4(),
38715            b.as_f32x4(),
38716            f32x4::ZERO,
38717            0b1,
38718            _MM_FROUND_NO_EXC,
38719        ))
38720    }
38721}
38722
38723/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38724///
38725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
38726#[inline]
38727#[target_feature(enable = "avx512f")]
38728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38729#[cfg_attr(test, assert_instr(vgetexpss))]
38730pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
38731    unsafe {
38732        transmute(vgetexpss(
38733            a.as_f32x4(),
38734            b.as_f32x4(),
38735            src.as_f32x4(),
38736            k,
38737            _MM_FROUND_NO_EXC,
38738        ))
38739    }
38740}
38741
38742/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38743///
38744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
38745#[inline]
38746#[target_feature(enable = "avx512f")]
38747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38748#[cfg_attr(test, assert_instr(vgetexpss))]
38749pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38750    unsafe {
38751        transmute(vgetexpss(
38752            a.as_f32x4(),
38753            b.as_f32x4(),
38754            f32x4::ZERO,
38755            k,
38756            _MM_FROUND_NO_EXC,
38757        ))
38758    }
38759}
38760
38761/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38762///
38763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
38764#[inline]
38765#[target_feature(enable = "avx512f")]
38766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38767#[cfg_attr(test, assert_instr(vgetexpsd))]
38768pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
38769    unsafe {
38770        transmute(vgetexpsd(
38771            a.as_f64x2(),
38772            b.as_f64x2(),
38773            f64x2::ZERO,
38774            0b1,
38775            _MM_FROUND_NO_EXC,
38776        ))
38777    }
38778}
38779
38780/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38781///
38782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
38783#[inline]
38784#[target_feature(enable = "avx512f")]
38785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38786#[cfg_attr(test, assert_instr(vgetexpsd))]
38787pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38788    unsafe {
38789        transmute(vgetexpsd(
38790            a.as_f64x2(),
38791            b.as_f64x2(),
38792            src.as_f64x2(),
38793            k,
38794            _MM_FROUND_NO_EXC,
38795        ))
38796    }
38797}
38798
38799/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
38800///
38801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
38802#[inline]
38803#[target_feature(enable = "avx512f")]
38804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38805#[cfg_attr(test, assert_instr(vgetexpsd))]
38806pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38807    unsafe {
38808        transmute(vgetexpsd(
38809            a.as_f64x2(),
38810            b.as_f64x2(),
38811            f64x2::ZERO,
38812            k,
38813            _MM_FROUND_NO_EXC,
38814        ))
38815    }
38816}
38817
38818/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38819/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38820///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38821///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38822///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38823///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38824/// The sign is determined by sc which can take the following values:\
38825///    _MM_MANT_SIGN_src     // sign = sign(src)\
38826///    _MM_MANT_SIGN_zero    // sign = 0\
38827///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38828/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38829///
38830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
38831#[inline]
38832#[target_feature(enable = "avx512f")]
38833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38834#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
38835#[rustc_legacy_const_generics(2, 3)]
38836pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
38837    a: __m128,
38838    b: __m128,
38839) -> __m128 {
38840    unsafe {
38841        static_assert_uimm_bits!(NORM, 4);
38842        static_assert_uimm_bits!(SIGN, 2);
38843        let a = a.as_f32x4();
38844        let b = b.as_f32x4();
38845        let r = vgetmantss(
38846            a,
38847            b,
38848            SIGN << 2 | NORM,
38849            f32x4::ZERO,
38850            0b1,
38851            _MM_FROUND_CUR_DIRECTION,
38852        );
38853        transmute(r)
38854    }
38855}
38856
38857/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38858/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38859///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38860///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38861///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38862///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38863/// The sign is determined by sc which can take the following values:\
38864///    _MM_MANT_SIGN_src     // sign = sign(src)\
38865///    _MM_MANT_SIGN_zero    // sign = 0\
38866///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38867/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38868///
38869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
38870#[inline]
38871#[target_feature(enable = "avx512f")]
38872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38873#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
38874#[rustc_legacy_const_generics(4, 5)]
38875pub fn _mm_mask_getmant_ss<
38876    const NORM: _MM_MANTISSA_NORM_ENUM,
38877    const SIGN: _MM_MANTISSA_SIGN_ENUM,
38878>(
38879    src: __m128,
38880    k: __mmask8,
38881    a: __m128,
38882    b: __m128,
38883) -> __m128 {
38884    unsafe {
38885        static_assert_uimm_bits!(NORM, 4);
38886        static_assert_uimm_bits!(SIGN, 2);
38887        let a = a.as_f32x4();
38888        let b = b.as_f32x4();
38889        let src = src.as_f32x4();
38890        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
38891        transmute(r)
38892    }
38893}
38894
38895/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38896/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38897///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38898///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38899///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38900///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38901/// The sign is determined by sc which can take the following values:\
38902///    _MM_MANT_SIGN_src     // sign = sign(src)\
38903///    _MM_MANT_SIGN_zero    // sign = 0\
38904///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38905/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38906///
38907/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
38908#[inline]
38909#[target_feature(enable = "avx512f")]
38910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38911#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
38912#[rustc_legacy_const_generics(3, 4)]
38913pub fn _mm_maskz_getmant_ss<
38914    const NORM: _MM_MANTISSA_NORM_ENUM,
38915    const SIGN: _MM_MANTISSA_SIGN_ENUM,
38916>(
38917    k: __mmask8,
38918    a: __m128,
38919    b: __m128,
38920) -> __m128 {
38921    unsafe {
38922        static_assert_uimm_bits!(NORM, 4);
38923        static_assert_uimm_bits!(SIGN, 2);
38924        let a = a.as_f32x4();
38925        let b = b.as_f32x4();
38926        let r = vgetmantss(
38927            a,
38928            b,
38929            SIGN << 2 | NORM,
38930            f32x4::ZERO,
38931            k,
38932            _MM_FROUND_CUR_DIRECTION,
38933        );
38934        transmute(r)
38935    }
38936}
38937
38938/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38939/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38940///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38941///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38942///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38943///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38944/// The sign is determined by sc which can take the following values:\
38945///    _MM_MANT_SIGN_src     // sign = sign(src)\
38946///    _MM_MANT_SIGN_zero    // sign = 0\
38947///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38948/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38949///
38950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
38951#[inline]
38952#[target_feature(enable = "avx512f")]
38953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38954#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
38955#[rustc_legacy_const_generics(2, 3)]
38956pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
38957    a: __m128d,
38958    b: __m128d,
38959) -> __m128d {
38960    unsafe {
38961        static_assert_uimm_bits!(NORM, 4);
38962        static_assert_uimm_bits!(SIGN, 2);
38963        let a = a.as_f64x2();
38964        let b = b.as_f64x2();
38965        let r = vgetmantsd(
38966            a,
38967            b,
38968            SIGN << 2 | NORM,
38969            f64x2::ZERO,
38970            0b1,
38971            _MM_FROUND_CUR_DIRECTION,
38972        );
38973        transmute(r)
38974    }
38975}
38976
38977/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
38978/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
38979///    _MM_MANT_NORM_1_2     // interval [1, 2)\
38980///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
38981///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
38982///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
38983/// The sign is determined by sc which can take the following values:\
38984///    _MM_MANT_SIGN_src     // sign = sign(src)\
38985///    _MM_MANT_SIGN_zero    // sign = 0\
38986///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
38987/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38988///
38989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
38990#[inline]
38991#[target_feature(enable = "avx512f")]
38992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38993#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
38994#[rustc_legacy_const_generics(4, 5)]
38995pub fn _mm_mask_getmant_sd<
38996    const NORM: _MM_MANTISSA_NORM_ENUM,
38997    const SIGN: _MM_MANTISSA_SIGN_ENUM,
38998>(
38999    src: __m128d,
39000    k: __mmask8,
39001    a: __m128d,
39002    b: __m128d,
39003) -> __m128d {
39004    unsafe {
39005        static_assert_uimm_bits!(NORM, 4);
39006        static_assert_uimm_bits!(SIGN, 2);
39007        let a = a.as_f64x2();
39008        let b = b.as_f64x2();
39009        let src = src.as_f64x2();
39010        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, _MM_FROUND_CUR_DIRECTION);
39011        transmute(r)
39012    }
39013}
39014
39015/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39016/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39017///    _MM_MANT_NORM_1_2     // interval [1, 2)\
39018///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
39019///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
39020///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39021/// The sign is determined by sc which can take the following values:\
39022///    _MM_MANT_SIGN_src     // sign = sign(src)\
39023///    _MM_MANT_SIGN_zero    // sign = 0\
39024///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
39025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39026///
39027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
39028#[inline]
39029#[target_feature(enable = "avx512f")]
39030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39031#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
39032#[rustc_legacy_const_generics(3, 4)]
39033pub fn _mm_maskz_getmant_sd<
39034    const NORM: _MM_MANTISSA_NORM_ENUM,
39035    const SIGN: _MM_MANTISSA_SIGN_ENUM,
39036>(
39037    k: __mmask8,
39038    a: __m128d,
39039    b: __m128d,
39040) -> __m128d {
39041    unsafe {
39042        static_assert_uimm_bits!(NORM, 4);
39043        static_assert_uimm_bits!(SIGN, 2);
39044        let a = a.as_f64x2();
39045        let b = b.as_f64x2();
39046        let r = vgetmantsd(
39047            a,
39048            b,
39049            SIGN << 2 | NORM,
39050            f64x2::ZERO,
39051            k,
39052            _MM_FROUND_CUR_DIRECTION,
39053        );
39054        transmute(r)
39055    }
39056}
39057
39058/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39059/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39060/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39061/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39062/// * [`_MM_FROUND_TO_POS_INF`] : round up
39063/// * [`_MM_FROUND_TO_ZERO`] : truncate
39064/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39065///
39066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
39067#[inline]
39068#[target_feature(enable = "avx512f")]
39069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39070#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
39071#[rustc_legacy_const_generics(2)]
39072pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
39073    unsafe {
39074        static_assert_uimm_bits!(IMM8, 8);
39075        let a = a.as_f32x4();
39076        let b = b.as_f32x4();
39077        let r = vrndscaless(
39078            a,
39079            b,
39080            f32x4::ZERO,
39081            0b11111111,
39082            IMM8,
39083            _MM_FROUND_CUR_DIRECTION,
39084        );
39085        transmute(r)
39086    }
39087}
39088
39089/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39090/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39091/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39092/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39093/// * [`_MM_FROUND_TO_POS_INF`] : round up
39094/// * [`_MM_FROUND_TO_ZERO`] : truncate
39095/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39096///
39097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
39098#[inline]
39099#[target_feature(enable = "avx512f")]
39100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39101#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
39102#[rustc_legacy_const_generics(4)]
39103pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
39104    src: __m128,
39105    k: __mmask8,
39106    a: __m128,
39107    b: __m128,
39108) -> __m128 {
39109    unsafe {
39110        static_assert_uimm_bits!(IMM8, 8);
39111        let a = a.as_f32x4();
39112        let b = b.as_f32x4();
39113        let src = src.as_f32x4();
39114        let r = vrndscaless(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
39115        transmute(r)
39116    }
39117}
39118
39119/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39120/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39121/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39122/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39123/// * [`_MM_FROUND_TO_POS_INF`] : round up
39124/// * [`_MM_FROUND_TO_ZERO`] : truncate
39125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39126///
39127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
39128#[inline]
39129#[target_feature(enable = "avx512f")]
39130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39131#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
39132#[rustc_legacy_const_generics(3)]
39133pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39134    unsafe {
39135        static_assert_uimm_bits!(IMM8, 8);
39136        let a = a.as_f32x4();
39137        let b = b.as_f32x4();
39138        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
39139        transmute(r)
39140    }
39141}
39142
39143/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39144/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39145/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39146/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39147/// * [`_MM_FROUND_TO_POS_INF`] : round up
39148/// * [`_MM_FROUND_TO_ZERO`] : truncate
39149/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39150///
39151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
39152#[inline]
39153#[target_feature(enable = "avx512f")]
39154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39155#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
39156#[rustc_legacy_const_generics(2)]
39157pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
39158    unsafe {
39159        static_assert_uimm_bits!(IMM8, 8);
39160        let a = a.as_f64x2();
39161        let b = b.as_f64x2();
39162        let r = vrndscalesd(
39163            a,
39164            b,
39165            f64x2::ZERO,
39166            0b11111111,
39167            IMM8,
39168            _MM_FROUND_CUR_DIRECTION,
39169        );
39170        transmute(r)
39171    }
39172}
39173
39174/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39175/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39176/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39177/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39178/// * [`_MM_FROUND_TO_POS_INF`] : round up
39179/// * [`_MM_FROUND_TO_ZERO`] : truncate
39180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39181///
39182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
39183#[inline]
39184#[target_feature(enable = "avx512f")]
39185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39186#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
39187#[rustc_legacy_const_generics(4)]
39188pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
39189    src: __m128d,
39190    k: __mmask8,
39191    a: __m128d,
39192    b: __m128d,
39193) -> __m128d {
39194    unsafe {
39195        static_assert_uimm_bits!(IMM8, 8);
39196        let a = a.as_f64x2();
39197        let b = b.as_f64x2();
39198        let src = src.as_f64x2();
39199        let r = vrndscalesd(a, b, src, k, IMM8, _MM_FROUND_CUR_DIRECTION);
39200        transmute(r)
39201    }
39202}
39203
39204/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39205/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39206/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39207/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39208/// * [`_MM_FROUND_TO_POS_INF`] : round up
39209/// * [`_MM_FROUND_TO_ZERO`] : truncate
39210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39211///
39212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
39213#[inline]
39214#[target_feature(enable = "avx512f")]
39215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39216#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
39217#[rustc_legacy_const_generics(3)]
39218pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39219    unsafe {
39220        static_assert_uimm_bits!(IMM8, 8);
39221        let a = a.as_f64x2();
39222        let b = b.as_f64x2();
39223        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, _MM_FROUND_CUR_DIRECTION);
39224        transmute(r)
39225    }
39226}
39227
39228/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39229///
39230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
39231#[inline]
39232#[target_feature(enable = "avx512f")]
39233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39234#[cfg_attr(test, assert_instr(vscalefss))]
39235pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
39236    unsafe {
39237        let a = a.as_f32x4();
39238        let b = b.as_f32x4();
39239        transmute(vscalefss(
39240            a,
39241            b,
39242            f32x4::ZERO,
39243            0b11111111,
39244            _MM_FROUND_CUR_DIRECTION,
39245        ))
39246    }
39247}
39248
39249/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39250///
39251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
39252#[inline]
39253#[target_feature(enable = "avx512f")]
39254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39255#[cfg_attr(test, assert_instr(vscalefss))]
39256pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
39257    unsafe {
39258        let a = a.as_f32x4();
39259        let b = b.as_f32x4();
39260        let src = src.as_f32x4();
39261        transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
39262    }
39263}
39264
39265/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39266///
39267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
39268#[inline]
39269#[target_feature(enable = "avx512f")]
39270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39271#[cfg_attr(test, assert_instr(vscalefss))]
39272pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39273    unsafe {
39274        transmute(vscalefss(
39275            a.as_f32x4(),
39276            b.as_f32x4(),
39277            f32x4::ZERO,
39278            k,
39279            _MM_FROUND_CUR_DIRECTION,
39280        ))
39281    }
39282}
39283
39284/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
39285///
39286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
39287#[inline]
39288#[target_feature(enable = "avx512f")]
39289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39290#[cfg_attr(test, assert_instr(vscalefsd))]
39291pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
39292    unsafe {
39293        transmute(vscalefsd(
39294            a.as_f64x2(),
39295            b.as_f64x2(),
39296            f64x2::ZERO,
39297            0b11111111,
39298            _MM_FROUND_CUR_DIRECTION,
39299        ))
39300    }
39301}
39302
39303/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39304///
39305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
39306#[inline]
39307#[target_feature(enable = "avx512f")]
39308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39309#[cfg_attr(test, assert_instr(vscalefsd))]
39310pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39311    unsafe {
39312        transmute(vscalefsd(
39313            a.as_f64x2(),
39314            b.as_f64x2(),
39315            src.as_f64x2(),
39316            k,
39317            _MM_FROUND_CUR_DIRECTION,
39318        ))
39319    }
39320}
39321
39322/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39323///
39324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
39325#[inline]
39326#[target_feature(enable = "avx512f")]
39327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39328#[cfg_attr(test, assert_instr(vscalefsd))]
39329pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39330    unsafe {
39331        transmute(vscalefsd(
39332            a.as_f64x2(),
39333            b.as_f64x2(),
39334            f64x2::ZERO,
39335            k,
39336            _MM_FROUND_CUR_DIRECTION,
39337        ))
39338    }
39339}
39340
39341/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39342///
39343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
39344#[inline]
39345#[target_feature(enable = "avx512f")]
39346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39347#[cfg_attr(test, assert_instr(vfmadd))]
39348#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39349pub const fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39350    unsafe {
39351        let mut fmadd: f32 = simd_extract!(a, 0);
39352        if (k & 0b00000001) != 0 {
39353            let extractb: f32 = simd_extract!(b, 0);
39354            let extractc: f32 = simd_extract!(c, 0);
39355            fmadd = fmaf32(fmadd, extractb, extractc);
39356        }
39357        simd_insert!(a, 0, fmadd)
39358    }
39359}
39360
39361/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39362///
39363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
39364#[inline]
39365#[target_feature(enable = "avx512f")]
39366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39367#[cfg_attr(test, assert_instr(vfmadd))]
39368#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39369pub const fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39370    unsafe {
39371        let mut fmadd: f32 = 0.;
39372        if (k & 0b00000001) != 0 {
39373            let extracta: f32 = simd_extract!(a, 0);
39374            let extractb: f32 = simd_extract!(b, 0);
39375            let extractc: f32 = simd_extract!(c, 0);
39376            fmadd = fmaf32(extracta, extractb, extractc);
39377        }
39378        simd_insert!(a, 0, fmadd)
39379    }
39380}
39381
39382/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39383///
39384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
39385#[inline]
39386#[target_feature(enable = "avx512f")]
39387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39388#[cfg_attr(test, assert_instr(vfmadd))]
39389#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39390pub const fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39391    unsafe {
39392        let mut fmadd: f32 = simd_extract!(c, 0);
39393        if (k & 0b00000001) != 0 {
39394            let extracta: f32 = simd_extract!(a, 0);
39395            let extractb: f32 = simd_extract!(b, 0);
39396            fmadd = fmaf32(extracta, extractb, fmadd);
39397        }
39398        simd_insert!(c, 0, fmadd)
39399    }
39400}
39401
39402/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39403///
39404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
39405#[inline]
39406#[target_feature(enable = "avx512f")]
39407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39408#[cfg_attr(test, assert_instr(vfmadd))]
39409#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39410pub const fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39411    unsafe {
39412        let mut fmadd: f64 = simd_extract!(a, 0);
39413        if (k & 0b00000001) != 0 {
39414            let extractb: f64 = simd_extract!(b, 0);
39415            let extractc: f64 = simd_extract!(c, 0);
39416            fmadd = fmaf64(fmadd, extractb, extractc);
39417        }
39418        simd_insert!(a, 0, fmadd)
39419    }
39420}
39421
39422/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39423///
39424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
39425#[inline]
39426#[target_feature(enable = "avx512f")]
39427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39428#[cfg_attr(test, assert_instr(vfmadd))]
39429#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39430pub const fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39431    unsafe {
39432        let mut fmadd: f64 = 0.;
39433        if (k & 0b00000001) != 0 {
39434            let extracta: f64 = simd_extract!(a, 0);
39435            let extractb: f64 = simd_extract!(b, 0);
39436            let extractc: f64 = simd_extract!(c, 0);
39437            fmadd = fmaf64(extracta, extractb, extractc);
39438        }
39439        simd_insert!(a, 0, fmadd)
39440    }
39441}
39442
39443/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39444///
39445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
39446#[inline]
39447#[target_feature(enable = "avx512f")]
39448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39449#[cfg_attr(test, assert_instr(vfmadd))]
39450#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39451pub const fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39452    unsafe {
39453        let mut fmadd: f64 = simd_extract!(c, 0);
39454        if (k & 0b00000001) != 0 {
39455            let extracta: f64 = simd_extract!(a, 0);
39456            let extractb: f64 = simd_extract!(b, 0);
39457            fmadd = fmaf64(extracta, extractb, fmadd);
39458        }
39459        simd_insert!(c, 0, fmadd)
39460    }
39461}
39462
39463/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
39464///
39465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
39466#[inline]
39467#[target_feature(enable = "avx512f")]
39468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39469#[cfg_attr(test, assert_instr(vfmsub))]
39470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39471pub const fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39472    unsafe {
39473        let mut fmsub: f32 = simd_extract!(a, 0);
39474        if (k & 0b00000001) != 0 {
39475            let extractb: f32 = simd_extract!(b, 0);
39476            let extractc: f32 = simd_extract!(c, 0);
39477            let extractc = -extractc;
39478            fmsub = fmaf32(fmsub, extractb, extractc);
39479        }
39480        simd_insert!(a, 0, fmsub)
39481    }
39482}
39483
39484/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39485///
39486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
39487#[inline]
39488#[target_feature(enable = "avx512f")]
39489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39490#[cfg_attr(test, assert_instr(vfmsub))]
39491#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39492pub const fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39493    unsafe {
39494        let mut fmsub: f32 = 0.;
39495        if (k & 0b00000001) != 0 {
39496            let extracta: f32 = simd_extract!(a, 0);
39497            let extractb: f32 = simd_extract!(b, 0);
39498            let extractc: f32 = simd_extract!(c, 0);
39499            let extractc = -extractc;
39500            fmsub = fmaf32(extracta, extractb, extractc);
39501        }
39502        simd_insert!(a, 0, fmsub)
39503    }
39504}
39505
39506/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39507///
39508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
39509#[inline]
39510#[target_feature(enable = "avx512f")]
39511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39512#[cfg_attr(test, assert_instr(vfmsub))]
39513#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39514pub const fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39515    unsafe {
39516        let mut fmsub: f32 = simd_extract!(c, 0);
39517        if (k & 0b00000001) != 0 {
39518            let extracta: f32 = simd_extract!(a, 0);
39519            let extractb: f32 = simd_extract!(b, 0);
39520            let extractc = -fmsub;
39521            fmsub = fmaf32(extracta, extractb, extractc);
39522        }
39523        simd_insert!(c, 0, fmsub)
39524    }
39525}
39526
39527/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39528///
39529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
39530#[inline]
39531#[target_feature(enable = "avx512f")]
39532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39533#[cfg_attr(test, assert_instr(vfmsub))]
39534#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39535pub const fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39536    unsafe {
39537        let mut fmsub: f64 = simd_extract!(a, 0);
39538        if (k & 0b00000001) != 0 {
39539            let extractb: f64 = simd_extract!(b, 0);
39540            let extractc: f64 = simd_extract!(c, 0);
39541            let extractc = -extractc;
39542            fmsub = fmaf64(fmsub, extractb, extractc);
39543        }
39544        simd_insert!(a, 0, fmsub)
39545    }
39546}
39547
39548/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39549///
39550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
39551#[inline]
39552#[target_feature(enable = "avx512f")]
39553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39554#[cfg_attr(test, assert_instr(vfmsub))]
39555#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39556pub const fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39557    unsafe {
39558        let mut fmsub: f64 = 0.;
39559        if (k & 0b00000001) != 0 {
39560            let extracta: f64 = simd_extract!(a, 0);
39561            let extractb: f64 = simd_extract!(b, 0);
39562            let extractc: f64 = simd_extract!(c, 0);
39563            let extractc = -extractc;
39564            fmsub = fmaf64(extracta, extractb, extractc);
39565        }
39566        simd_insert!(a, 0, fmsub)
39567    }
39568}
39569
39570/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39571///
39572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
39573#[inline]
39574#[target_feature(enable = "avx512f")]
39575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39576#[cfg_attr(test, assert_instr(vfmsub))]
39577#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39578pub const fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39579    unsafe {
39580        let mut fmsub: f64 = simd_extract!(c, 0);
39581        if (k & 0b00000001) != 0 {
39582            let extracta: f64 = simd_extract!(a, 0);
39583            let extractb: f64 = simd_extract!(b, 0);
39584            let extractc = -fmsub;
39585            fmsub = fmaf64(extracta, extractb, extractc);
39586        }
39587        simd_insert!(c, 0, fmsub)
39588    }
39589}
39590
39591/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39592///
39593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
39594#[inline]
39595#[target_feature(enable = "avx512f")]
39596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39597#[cfg_attr(test, assert_instr(vfnmadd))]
39598#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39599pub const fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39600    unsafe {
39601        let mut fnmadd: f32 = simd_extract!(a, 0);
39602        if (k & 0b00000001) != 0 {
39603            let extracta = -fnmadd;
39604            let extractb: f32 = simd_extract!(b, 0);
39605            let extractc: f32 = simd_extract!(c, 0);
39606            fnmadd = fmaf32(extracta, extractb, extractc);
39607        }
39608        simd_insert!(a, 0, fnmadd)
39609    }
39610}
39611
39612/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39613///
39614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
39615#[inline]
39616#[target_feature(enable = "avx512f")]
39617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39618#[cfg_attr(test, assert_instr(vfnmadd))]
39619#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39620pub const fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39621    unsafe {
39622        let mut fnmadd: f32 = 0.;
39623        if (k & 0b00000001) != 0 {
39624            let extracta: f32 = simd_extract!(a, 0);
39625            let extracta = -extracta;
39626            let extractb: f32 = simd_extract!(b, 0);
39627            let extractc: f32 = simd_extract!(c, 0);
39628            fnmadd = fmaf32(extracta, extractb, extractc);
39629        }
39630        simd_insert!(a, 0, fnmadd)
39631    }
39632}
39633
39634/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39635///
39636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
39637#[inline]
39638#[target_feature(enable = "avx512f")]
39639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39640#[cfg_attr(test, assert_instr(vfnmadd))]
39641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39642pub const fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39643    unsafe {
39644        let mut fnmadd: f32 = simd_extract!(c, 0);
39645        if (k & 0b00000001) != 0 {
39646            let extracta: f32 = simd_extract!(a, 0);
39647            let extracta = -extracta;
39648            let extractb: f32 = simd_extract!(b, 0);
39649            fnmadd = fmaf32(extracta, extractb, fnmadd);
39650        }
39651        simd_insert!(c, 0, fnmadd)
39652    }
39653}
39654
39655/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39656///
39657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
39658#[inline]
39659#[target_feature(enable = "avx512f")]
39660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39661#[cfg_attr(test, assert_instr(vfnmadd))]
39662#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39663pub const fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39664    unsafe {
39665        let mut fnmadd: f64 = simd_extract!(a, 0);
39666        if (k & 0b00000001) != 0 {
39667            let extracta = -fnmadd;
39668            let extractb: f64 = simd_extract!(b, 0);
39669            let extractc: f64 = simd_extract!(c, 0);
39670            fnmadd = fmaf64(extracta, extractb, extractc);
39671        }
39672        simd_insert!(a, 0, fnmadd)
39673    }
39674}
39675
39676/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39677///
39678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
39679#[inline]
39680#[target_feature(enable = "avx512f")]
39681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39682#[cfg_attr(test, assert_instr(vfnmadd))]
39683#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39684pub const fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39685    unsafe {
39686        let mut fnmadd: f64 = 0.;
39687        if (k & 0b00000001) != 0 {
39688            let extracta: f64 = simd_extract!(a, 0);
39689            let extracta = -extracta;
39690            let extractb: f64 = simd_extract!(b, 0);
39691            let extractc: f64 = simd_extract!(c, 0);
39692            fnmadd = fmaf64(extracta, extractb, extractc);
39693        }
39694        simd_insert!(a, 0, fnmadd)
39695    }
39696}
39697
39698/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39699///
39700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
39701#[inline]
39702#[target_feature(enable = "avx512f")]
39703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39704#[cfg_attr(test, assert_instr(vfnmadd))]
39705#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39706pub const fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39707    unsafe {
39708        let mut fnmadd: f64 = simd_extract!(c, 0);
39709        if (k & 0b00000001) != 0 {
39710            let extracta: f64 = simd_extract!(a, 0);
39711            let extracta = -extracta;
39712            let extractb: f64 = simd_extract!(b, 0);
39713            fnmadd = fmaf64(extracta, extractb, fnmadd);
39714        }
39715        simd_insert!(c, 0, fnmadd)
39716    }
39717}
39718
39719/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39720///
39721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
39722#[inline]
39723#[target_feature(enable = "avx512f")]
39724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39725#[cfg_attr(test, assert_instr(vfnmsub))]
39726#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39727pub const fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
39728    unsafe {
39729        let mut fnmsub: f32 = simd_extract!(a, 0);
39730        if (k & 0b00000001) != 0 {
39731            let extracta = -fnmsub;
39732            let extractb: f32 = simd_extract!(b, 0);
39733            let extractc: f32 = simd_extract!(c, 0);
39734            let extractc = -extractc;
39735            fnmsub = fmaf32(extracta, extractb, extractc);
39736        }
39737        simd_insert!(a, 0, fnmsub)
39738    }
39739}
39740
39741/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
39742///
39743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
39744#[inline]
39745#[target_feature(enable = "avx512f")]
39746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39747#[cfg_attr(test, assert_instr(vfnmsub))]
39748#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39749pub const fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
39750    unsafe {
39751        let mut fnmsub: f32 = 0.;
39752        if (k & 0b00000001) != 0 {
39753            let extracta: f32 = simd_extract!(a, 0);
39754            let extracta = -extracta;
39755            let extractb: f32 = simd_extract!(b, 0);
39756            let extractc: f32 = simd_extract!(c, 0);
39757            let extractc = -extractc;
39758            fnmsub = fmaf32(extracta, extractb, extractc);
39759        }
39760        simd_insert!(a, 0, fnmsub)
39761    }
39762}
39763
39764/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
39765///
39766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
39767#[inline]
39768#[target_feature(enable = "avx512f")]
39769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39770#[cfg_attr(test, assert_instr(vfnmsub))]
39771#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39772pub const fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
39773    unsafe {
39774        let mut fnmsub: f32 = simd_extract!(c, 0);
39775        if (k & 0b00000001) != 0 {
39776            let extracta: f32 = simd_extract!(a, 0);
39777            let extracta = -extracta;
39778            let extractb: f32 = simd_extract!(b, 0);
39779            let extractc = -fnmsub;
39780            fnmsub = fmaf32(extracta, extractb, extractc);
39781        }
39782        simd_insert!(c, 0, fnmsub)
39783    }
39784}
39785
39786/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39787///
39788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
39789#[inline]
39790#[target_feature(enable = "avx512f")]
39791#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39792#[cfg_attr(test, assert_instr(vfnmsub))]
39793#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39794pub const fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
39795    unsafe {
39796        let mut fnmsub: f64 = simd_extract!(a, 0);
39797        if (k & 0b00000001) != 0 {
39798            let extracta = -fnmsub;
39799            let extractb: f64 = simd_extract!(b, 0);
39800            let extractc: f64 = simd_extract!(c, 0);
39801            let extractc = -extractc;
39802            fnmsub = fmaf64(extracta, extractb, extractc);
39803        }
39804        simd_insert!(a, 0, fnmsub)
39805    }
39806}
39807
39808/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
39809///
39810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
39811#[inline]
39812#[target_feature(enable = "avx512f")]
39813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39814#[cfg_attr(test, assert_instr(vfnmsub))]
39815#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39816pub const fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39817    unsafe {
39818        let mut fnmsub: f64 = 0.;
39819        if (k & 0b00000001) != 0 {
39820            let extracta: f64 = simd_extract!(a, 0);
39821            let extracta = -extracta;
39822            let extractb: f64 = simd_extract!(b, 0);
39823            let extractc: f64 = simd_extract!(c, 0);
39824            let extractc = -extractc;
39825            fnmsub = fmaf64(extracta, extractb, extractc);
39826        }
39827        simd_insert!(a, 0, fnmsub)
39828    }
39829}
39830
39831/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
39832///
39833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
39834#[inline]
39835#[target_feature(enable = "avx512f")]
39836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39837#[cfg_attr(test, assert_instr(vfnmsub))]
39838#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
39839pub const fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
39840    unsafe {
39841        let mut fnmsub: f64 = simd_extract!(c, 0);
39842        if (k & 0b00000001) != 0 {
39843            let extracta: f64 = simd_extract!(a, 0);
39844            let extracta = -extracta;
39845            let extractb: f64 = simd_extract!(b, 0);
39846            let extractc = -fnmsub;
39847            fnmsub = fmaf64(extracta, extractb, extractc);
39848        }
39849        simd_insert!(c, 0, fnmsub)
39850    }
39851}
39852
39853/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39854///
39855/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39856/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39857/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39858/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39859/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39860/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39861///
39862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
39863#[inline]
39864#[target_feature(enable = "avx512f")]
39865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39866#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
39867#[rustc_legacy_const_generics(2)]
39868pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39869    unsafe {
39870        static_assert_rounding!(ROUNDING);
39871        let a = a.as_f32x4();
39872        let b = b.as_f32x4();
39873        let r = vaddss(a, b, f32x4::ZERO, 0b1, ROUNDING);
39874        transmute(r)
39875    }
39876}
39877
39878/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39879///
39880/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39881/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39882/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39883/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39884/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39885/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39886///
39887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
39888#[inline]
39889#[target_feature(enable = "avx512f")]
39890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39891#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
39892#[rustc_legacy_const_generics(4)]
39893pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
39894    src: __m128,
39895    k: __mmask8,
39896    a: __m128,
39897    b: __m128,
39898) -> __m128 {
39899    unsafe {
39900        static_assert_rounding!(ROUNDING);
39901        let a = a.as_f32x4();
39902        let b = b.as_f32x4();
39903        let src = src.as_f32x4();
39904        let r = vaddss(a, b, src, k, ROUNDING);
39905        transmute(r)
39906    }
39907}
39908
39909/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39910///
39911/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39912/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39913/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39914/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39915/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39917///
39918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
39919#[inline]
39920#[target_feature(enable = "avx512f")]
39921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39922#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
39923#[rustc_legacy_const_generics(3)]
39924pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39925    unsafe {
39926        static_assert_rounding!(ROUNDING);
39927        let a = a.as_f32x4();
39928        let b = b.as_f32x4();
39929        let r = vaddss(a, b, f32x4::ZERO, k, ROUNDING);
39930        transmute(r)
39931    }
39932}
39933
39934/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39935///
39936/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39937/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39938/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39939/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39940/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39942///
39943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
39944#[inline]
39945#[target_feature(enable = "avx512f")]
39946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39947#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
39948#[rustc_legacy_const_generics(2)]
39949pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39950    unsafe {
39951        static_assert_rounding!(ROUNDING);
39952        let a = a.as_f64x2();
39953        let b = b.as_f64x2();
39954        let r = vaddsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
39955        transmute(r)
39956    }
39957}
39958
39959/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39960///
39961/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39962/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39963/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39964/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39965/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39966/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39967///
39968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
39969#[inline]
39970#[target_feature(enable = "avx512f")]
39971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39972#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
39973#[rustc_legacy_const_generics(4)]
39974pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
39975    src: __m128d,
39976    k: __mmask8,
39977    a: __m128d,
39978    b: __m128d,
39979) -> __m128d {
39980    unsafe {
39981        static_assert_rounding!(ROUNDING);
39982        let a = a.as_f64x2();
39983        let b = b.as_f64x2();
39984        let src = src.as_f64x2();
39985        let r = vaddsd(a, b, src, k, ROUNDING);
39986        transmute(r)
39987    }
39988}
39989
39990/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39991///
39992/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39993/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39994/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39995/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39996/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39997/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39998///
39999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
40000#[inline]
40001#[target_feature(enable = "avx512f")]
40002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40003#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
40004#[rustc_legacy_const_generics(3)]
40005pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40006    unsafe {
40007        static_assert_rounding!(ROUNDING);
40008        let a = a.as_f64x2();
40009        let b = b.as_f64x2();
40010        let r = vaddsd(a, b, f64x2::ZERO, k, ROUNDING);
40011        transmute(r)
40012    }
40013}
40014
40015/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40016///
40017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40023///
40024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
40025#[inline]
40026#[target_feature(enable = "avx512f")]
40027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40028#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
40029#[rustc_legacy_const_generics(2)]
40030pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40031    unsafe {
40032        static_assert_rounding!(ROUNDING);
40033        let a = a.as_f32x4();
40034        let b = b.as_f32x4();
40035        let r = vsubss(a, b, f32x4::ZERO, 0b1, ROUNDING);
40036        transmute(r)
40037    }
40038}
40039
40040/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40041///
40042/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40043/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40044/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40045/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40046/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40047/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40048///
40049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
40050#[inline]
40051#[target_feature(enable = "avx512f")]
40052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40053#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
40054#[rustc_legacy_const_generics(4)]
40055pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
40056    src: __m128,
40057    k: __mmask8,
40058    a: __m128,
40059    b: __m128,
40060) -> __m128 {
40061    unsafe {
40062        static_assert_rounding!(ROUNDING);
40063        let a = a.as_f32x4();
40064        let b = b.as_f32x4();
40065        let src = src.as_f32x4();
40066        let r = vsubss(a, b, src, k, ROUNDING);
40067        transmute(r)
40068    }
40069}
40070
40071/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40072///
40073/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40074/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40075/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40076/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40077/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40078/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40079///
40080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
40081#[inline]
40082#[target_feature(enable = "avx512f")]
40083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40084#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
40085#[rustc_legacy_const_generics(3)]
40086pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40087    unsafe {
40088        static_assert_rounding!(ROUNDING);
40089        let a = a.as_f32x4();
40090        let b = b.as_f32x4();
40091        let r = vsubss(a, b, f32x4::ZERO, k, ROUNDING);
40092        transmute(r)
40093    }
40094}
40095
40096/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40097///
40098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40104///
40105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
40106#[inline]
40107#[target_feature(enable = "avx512f")]
40108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40109#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
40110#[rustc_legacy_const_generics(2)]
40111pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40112    unsafe {
40113        static_assert_rounding!(ROUNDING);
40114        let a = a.as_f64x2();
40115        let b = b.as_f64x2();
40116        let r = vsubsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
40117        transmute(r)
40118    }
40119}
40120
40121/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40122///
40123/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40124/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40125/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40126/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40127/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40128/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40129///
40130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
40131#[inline]
40132#[target_feature(enable = "avx512f")]
40133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40134#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
40135#[rustc_legacy_const_generics(4)]
40136pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
40137    src: __m128d,
40138    k: __mmask8,
40139    a: __m128d,
40140    b: __m128d,
40141) -> __m128d {
40142    unsafe {
40143        static_assert_rounding!(ROUNDING);
40144        let a = a.as_f64x2();
40145        let b = b.as_f64x2();
40146        let src = src.as_f64x2();
40147        let r = vsubsd(a, b, src, k, ROUNDING);
40148        transmute(r)
40149    }
40150}
40151
40152/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40153///
40154/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40155/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40156/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40157/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40158/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40160///
40161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
40162#[inline]
40163#[target_feature(enable = "avx512f")]
40164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40165#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
40166#[rustc_legacy_const_generics(3)]
40167pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40168    unsafe {
40169        static_assert_rounding!(ROUNDING);
40170        let a = a.as_f64x2();
40171        let b = b.as_f64x2();
40172        let r = vsubsd(a, b, f64x2::ZERO, k, ROUNDING);
40173        transmute(r)
40174    }
40175}
40176
40177/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40178///
40179/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40180/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40181/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40182/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40183/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40184/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40185///
40186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
40187#[inline]
40188#[target_feature(enable = "avx512f")]
40189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40190#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
40191#[rustc_legacy_const_generics(2)]
40192pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40193    unsafe {
40194        static_assert_rounding!(ROUNDING);
40195        let a = a.as_f32x4();
40196        let b = b.as_f32x4();
40197        let r = vmulss(a, b, f32x4::ZERO, 0b1, ROUNDING);
40198        transmute(r)
40199    }
40200}
40201
40202/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40203///
40204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40210///
40211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
40212#[inline]
40213#[target_feature(enable = "avx512f")]
40214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40215#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
40216#[rustc_legacy_const_generics(4)]
40217pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
40218    src: __m128,
40219    k: __mmask8,
40220    a: __m128,
40221    b: __m128,
40222) -> __m128 {
40223    unsafe {
40224        static_assert_rounding!(ROUNDING);
40225        let a = a.as_f32x4();
40226        let b = b.as_f32x4();
40227        let src = src.as_f32x4();
40228        let r = vmulss(a, b, src, k, ROUNDING);
40229        transmute(r)
40230    }
40231}
40232
40233/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40234///
40235/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40236/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40237/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40238/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40239/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40240/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40241///
40242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
40243#[inline]
40244#[target_feature(enable = "avx512f")]
40245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40246#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
40247#[rustc_legacy_const_generics(3)]
40248pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40249    unsafe {
40250        static_assert_rounding!(ROUNDING);
40251        let a = a.as_f32x4();
40252        let b = b.as_f32x4();
40253        let r = vmulss(a, b, f32x4::ZERO, k, ROUNDING);
40254        transmute(r)
40255    }
40256}
40257
40258/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40259///
40260/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40261/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40262/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40263/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40264/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40265/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40266///
40267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
40268#[inline]
40269#[target_feature(enable = "avx512f")]
40270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40271#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
40272#[rustc_legacy_const_generics(2)]
40273pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40274    unsafe {
40275        static_assert_rounding!(ROUNDING);
40276        let a = a.as_f64x2();
40277        let b = b.as_f64x2();
40278        let r = vmulsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
40279        transmute(r)
40280    }
40281}
40282
40283/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40284///
40285/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40286/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40287/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40288/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40289/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40290/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40291///
40292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
40293#[inline]
40294#[target_feature(enable = "avx512f")]
40295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40296#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
40297#[rustc_legacy_const_generics(4)]
40298pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
40299    src: __m128d,
40300    k: __mmask8,
40301    a: __m128d,
40302    b: __m128d,
40303) -> __m128d {
40304    unsafe {
40305        static_assert_rounding!(ROUNDING);
40306        let a = a.as_f64x2();
40307        let b = b.as_f64x2();
40308        let src = src.as_f64x2();
40309        let r = vmulsd(a, b, src, k, ROUNDING);
40310        transmute(r)
40311    }
40312}
40313
40314/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40315///
40316/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40317/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40318/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40319/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40320/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40321/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40322///
40323/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
40324#[inline]
40325#[target_feature(enable = "avx512f")]
40326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40327#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
40328#[rustc_legacy_const_generics(3)]
40329pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40330    unsafe {
40331        static_assert_rounding!(ROUNDING);
40332        let a = a.as_f64x2();
40333        let b = b.as_f64x2();
40334        let r = vmulsd(a, b, f64x2::ZERO, k, ROUNDING);
40335        transmute(r)
40336    }
40337}
40338
40339/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40340///
40341/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40342/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40343/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40344/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40345/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40346/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40347///
40348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
40349#[inline]
40350#[target_feature(enable = "avx512f")]
40351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40352#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
40353#[rustc_legacy_const_generics(2)]
40354pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40355    unsafe {
40356        static_assert_rounding!(ROUNDING);
40357        let a = a.as_f32x4();
40358        let b = b.as_f32x4();
40359        let r = vdivss(a, b, f32x4::ZERO, 0b1, ROUNDING);
40360        transmute(r)
40361    }
40362}
40363
40364/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40365///
40366/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40367/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40368/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40369/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40370/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40371/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40372///
40373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
40374#[inline]
40375#[target_feature(enable = "avx512f")]
40376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40377#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
40378#[rustc_legacy_const_generics(4)]
40379pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
40380    src: __m128,
40381    k: __mmask8,
40382    a: __m128,
40383    b: __m128,
40384) -> __m128 {
40385    unsafe {
40386        static_assert_rounding!(ROUNDING);
40387        let a = a.as_f32x4();
40388        let b = b.as_f32x4();
40389        let src = src.as_f32x4();
40390        let r = vdivss(a, b, src, k, ROUNDING);
40391        transmute(r)
40392    }
40393}
40394
40395/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40396///
40397/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40398/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40399/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40400/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40401/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40402/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40403///
40404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
40405#[inline]
40406#[target_feature(enable = "avx512f")]
40407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40408#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
40409#[rustc_legacy_const_generics(3)]
40410pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40411    unsafe {
40412        static_assert_rounding!(ROUNDING);
40413        let a = a.as_f32x4();
40414        let b = b.as_f32x4();
40415        let r = vdivss(a, b, f32x4::ZERO, k, ROUNDING);
40416        transmute(r)
40417    }
40418}
40419
40420/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40421///
40422/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40423/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40424/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40425/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40426/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40427/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40428///
40429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
40430#[inline]
40431#[target_feature(enable = "avx512f")]
40432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40433#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
40434#[rustc_legacy_const_generics(2)]
40435pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40436    unsafe {
40437        static_assert_rounding!(ROUNDING);
40438        let a = a.as_f64x2();
40439        let b = b.as_f64x2();
40440        let r = vdivsd(a, b, f64x2::ZERO, 0b1, ROUNDING);
40441        transmute(r)
40442    }
40443}
40444
40445/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40446///
40447/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40448/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40449/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40450/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40451/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40452/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40453///
40454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
40455#[inline]
40456#[target_feature(enable = "avx512f")]
40457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40458#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
40459#[rustc_legacy_const_generics(4)]
40460pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
40461    src: __m128d,
40462    k: __mmask8,
40463    a: __m128d,
40464    b: __m128d,
40465) -> __m128d {
40466    unsafe {
40467        static_assert_rounding!(ROUNDING);
40468        let a = a.as_f64x2();
40469        let b = b.as_f64x2();
40470        let src = src.as_f64x2();
40471        let r = vdivsd(a, b, src, k, ROUNDING);
40472        transmute(r)
40473    }
40474}
40475
40476/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40477///
40478/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40479/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40480/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40481/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40482/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40483/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40484///
40485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
40486#[inline]
40487#[target_feature(enable = "avx512f")]
40488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40489#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
40490#[rustc_legacy_const_generics(3)]
40491pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40492    unsafe {
40493        static_assert_rounding!(ROUNDING);
40494        let a = a.as_f64x2();
40495        let b = b.as_f64x2();
40496        let r = vdivsd(a, b, f64x2::ZERO, k, ROUNDING);
40497        transmute(r)
40498    }
40499}
40500
40501/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40502/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40503///
40504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
40505#[inline]
40506#[target_feature(enable = "avx512f")]
40507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40508#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
40509#[rustc_legacy_const_generics(2)]
40510pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40511    unsafe {
40512        static_assert_sae!(SAE);
40513        let a = a.as_f32x4();
40514        let b = b.as_f32x4();
40515        let r = vmaxss(a, b, f32x4::ZERO, 0b1, SAE);
40516        transmute(r)
40517    }
40518}
40519
40520/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40521/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40522///
40523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
40524#[inline]
40525#[target_feature(enable = "avx512f")]
40526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40527#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
40528#[rustc_legacy_const_generics(4)]
40529pub fn _mm_mask_max_round_ss<const SAE: i32>(
40530    src: __m128,
40531    k: __mmask8,
40532    a: __m128,
40533    b: __m128,
40534) -> __m128 {
40535    unsafe {
40536        static_assert_sae!(SAE);
40537        let a = a.as_f32x4();
40538        let b = b.as_f32x4();
40539        let src = src.as_f32x4();
40540        let r = vmaxss(a, b, src, k, SAE);
40541        transmute(r)
40542    }
40543}
40544
40545/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40546/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40547///
40548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
40549#[inline]
40550#[target_feature(enable = "avx512f")]
40551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40552#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
40553#[rustc_legacy_const_generics(3)]
40554pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40555    unsafe {
40556        static_assert_sae!(SAE);
40557        let a = a.as_f32x4();
40558        let b = b.as_f32x4();
40559        let r = vmaxss(a, b, f32x4::ZERO, k, SAE);
40560        transmute(r)
40561    }
40562}
40563
40564/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40565/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40566///
40567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
40568#[inline]
40569#[target_feature(enable = "avx512f")]
40570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40571#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
40572#[rustc_legacy_const_generics(2)]
40573pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40574    unsafe {
40575        static_assert_sae!(SAE);
40576        let a = a.as_f64x2();
40577        let b = b.as_f64x2();
40578        let r = vmaxsd(a, b, f64x2::ZERO, 0b1, SAE);
40579        transmute(r)
40580    }
40581}
40582
40583/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40584/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40585///
40586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
40587#[inline]
40588#[target_feature(enable = "avx512f")]
40589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40590#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
40591#[rustc_legacy_const_generics(4)]
40592pub fn _mm_mask_max_round_sd<const SAE: i32>(
40593    src: __m128d,
40594    k: __mmask8,
40595    a: __m128d,
40596    b: __m128d,
40597) -> __m128d {
40598    unsafe {
40599        static_assert_sae!(SAE);
40600        let a = a.as_f64x2();
40601        let b = b.as_f64x2();
40602        let src = src.as_f64x2();
40603        let r = vmaxsd(a, b, src, k, SAE);
40604        transmute(r)
40605    }
40606}
40607
40608/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40609/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40610///
40611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
40612#[inline]
40613#[target_feature(enable = "avx512f")]
40614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40615#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
40616#[rustc_legacy_const_generics(3)]
40617pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40618    unsafe {
40619        static_assert_sae!(SAE);
40620        let a = a.as_f64x2();
40621        let b = b.as_f64x2();
40622        let r = vmaxsd(a, b, f64x2::ZERO, k, SAE);
40623        transmute(r)
40624    }
40625}
40626
40627/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40628/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40629///
40630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
40631#[inline]
40632#[target_feature(enable = "avx512f")]
40633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40634#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
40635#[rustc_legacy_const_generics(2)]
40636pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40637    unsafe {
40638        static_assert_sae!(SAE);
40639        let a = a.as_f32x4();
40640        let b = b.as_f32x4();
40641        let r = vminss(a, b, f32x4::ZERO, 0b1, SAE);
40642        transmute(r)
40643    }
40644}
40645
40646/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40647/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40648///
40649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
40650#[inline]
40651#[target_feature(enable = "avx512f")]
40652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40653#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
40654#[rustc_legacy_const_generics(4)]
40655pub fn _mm_mask_min_round_ss<const SAE: i32>(
40656    src: __m128,
40657    k: __mmask8,
40658    a: __m128,
40659    b: __m128,
40660) -> __m128 {
40661    unsafe {
40662        static_assert_sae!(SAE);
40663        let a = a.as_f32x4();
40664        let b = b.as_f32x4();
40665        let src = src.as_f32x4();
40666        let r = vminss(a, b, src, k, SAE);
40667        transmute(r)
40668    }
40669}
40670
40671/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40672/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40673///
40674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
40675#[inline]
40676#[target_feature(enable = "avx512f")]
40677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40678#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
40679#[rustc_legacy_const_generics(3)]
40680pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40681    unsafe {
40682        static_assert_sae!(SAE);
40683        let a = a.as_f32x4();
40684        let b = b.as_f32x4();
40685        let r = vminss(a, b, f32x4::ZERO, k, SAE);
40686        transmute(r)
40687    }
40688}
40689
40690/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
40691/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40692///
40693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
40694#[inline]
40695#[target_feature(enable = "avx512f")]
40696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40697#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
40698#[rustc_legacy_const_generics(2)]
40699pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40700    unsafe {
40701        static_assert_sae!(SAE);
40702        let a = a.as_f64x2();
40703        let b = b.as_f64x2();
40704        let r = vminsd(a, b, f64x2::ZERO, 0b1, SAE);
40705        transmute(r)
40706    }
40707}
40708
40709/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40710/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40711///
40712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
40713#[inline]
40714#[target_feature(enable = "avx512f")]
40715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40716#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
40717#[rustc_legacy_const_generics(4)]
40718pub fn _mm_mask_min_round_sd<const SAE: i32>(
40719    src: __m128d,
40720    k: __mmask8,
40721    a: __m128d,
40722    b: __m128d,
40723) -> __m128d {
40724    unsafe {
40725        static_assert_sae!(SAE);
40726        let a = a.as_f64x2();
40727        let b = b.as_f64x2();
40728        let src = src.as_f64x2();
40729        let r = vminsd(a, b, src, k, SAE);
40730        transmute(r)
40731    }
40732}
40733
40734/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40735/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40736///
40737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
40738#[inline]
40739#[target_feature(enable = "avx512f")]
40740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40741#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
40742#[rustc_legacy_const_generics(3)]
40743pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
40744    unsafe {
40745        static_assert_sae!(SAE);
40746        let a = a.as_f64x2();
40747        let b = b.as_f64x2();
40748        let r = vminsd(a, b, f64x2::ZERO, k, SAE);
40749        transmute(r)
40750    }
40751}
40752
40753/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40754///
40755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40761///
40762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
40763#[inline]
40764#[target_feature(enable = "avx512f")]
40765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40766#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
40767#[rustc_legacy_const_generics(2)]
40768pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
40769    unsafe {
40770        static_assert_rounding!(ROUNDING);
40771        vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
40772    }
40773}
40774
40775/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40776///
40777/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40778/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40779/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40780/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40781/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40782/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40783///
40784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
40785#[inline]
40786#[target_feature(enable = "avx512f")]
40787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40788#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
40789#[rustc_legacy_const_generics(4)]
40790pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
40791    src: __m128,
40792    k: __mmask8,
40793    a: __m128,
40794    b: __m128,
40795) -> __m128 {
40796    unsafe {
40797        static_assert_rounding!(ROUNDING);
40798        vsqrtss(a, b, src, k, ROUNDING)
40799    }
40800}
40801
40802/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40803///
40804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40810///
40811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
40812#[inline]
40813#[target_feature(enable = "avx512f")]
40814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40815#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
40816#[rustc_legacy_const_generics(3)]
40817pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40818    unsafe {
40819        static_assert_rounding!(ROUNDING);
40820        vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
40821    }
40822}
40823
40824/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40825///
40826/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40827/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40828/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40829/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40830/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40831/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40832///
40833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
40834#[inline]
40835#[target_feature(enable = "avx512f")]
40836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40837#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
40838#[rustc_legacy_const_generics(2)]
40839pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
40840    unsafe {
40841        static_assert_rounding!(ROUNDING);
40842        vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
40843    }
40844}
40845
40846/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40847///
40848/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40849/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40850/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40851/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40852/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40853/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40854///
40855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
40856#[inline]
40857#[target_feature(enable = "avx512f")]
40858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40859#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
40860#[rustc_legacy_const_generics(4)]
40861pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
40862    src: __m128d,
40863    k: __mmask8,
40864    a: __m128d,
40865    b: __m128d,
40866) -> __m128d {
40867    unsafe {
40868        static_assert_rounding!(ROUNDING);
40869        vsqrtsd(a, b, src, k, ROUNDING)
40870    }
40871}
40872
40873/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40874///
40875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40881///
40882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
40883#[inline]
40884#[target_feature(enable = "avx512f")]
40885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40886#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
40887#[rustc_legacy_const_generics(3)]
40888pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
40889    k: __mmask8,
40890    a: __m128d,
40891    b: __m128d,
40892) -> __m128d {
40893    unsafe {
40894        static_assert_rounding!(ROUNDING);
40895        vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
40896    }
40897}
40898
40899/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40900/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40901///
40902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
40903#[inline]
40904#[target_feature(enable = "avx512f")]
40905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40906#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
40907#[rustc_legacy_const_generics(2)]
40908pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
40909    unsafe {
40910        static_assert_sae!(SAE);
40911        let a = a.as_f32x4();
40912        let b = b.as_f32x4();
40913        let r = vgetexpss(a, b, f32x4::ZERO, 0b1, SAE);
40914        transmute(r)
40915    }
40916}
40917
40918/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40919/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40920///
40921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
40922#[inline]
40923#[target_feature(enable = "avx512f")]
40924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40925#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
40926#[rustc_legacy_const_generics(4)]
40927pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
40928    src: __m128,
40929    k: __mmask8,
40930    a: __m128,
40931    b: __m128,
40932) -> __m128 {
40933    unsafe {
40934        static_assert_sae!(SAE);
40935        let a = a.as_f32x4();
40936        let b = b.as_f32x4();
40937        let src = src.as_f32x4();
40938        let r = vgetexpss(a, b, src, k, SAE);
40939        transmute(r)
40940    }
40941}
40942
40943/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40944/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40945///
40946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
40947#[inline]
40948#[target_feature(enable = "avx512f")]
40949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40950#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
40951#[rustc_legacy_const_generics(3)]
40952pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
40953    unsafe {
40954        static_assert_sae!(SAE);
40955        let a = a.as_f32x4();
40956        let b = b.as_f32x4();
40957        let r = vgetexpss(a, b, f32x4::ZERO, k, SAE);
40958        transmute(r)
40959    }
40960}
40961
40962/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40963/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40964///
40965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
40966#[inline]
40967#[target_feature(enable = "avx512f")]
40968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40969#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
40970#[rustc_legacy_const_generics(2)]
40971pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
40972    unsafe {
40973        static_assert_sae!(SAE);
40974        let a = a.as_f64x2();
40975        let b = b.as_f64x2();
40976        let r = vgetexpsd(a, b, f64x2::ZERO, 0b1, SAE);
40977        transmute(r)
40978    }
40979}
40980
40981/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
40982/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40983///
40984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
40985#[inline]
40986#[target_feature(enable = "avx512f")]
40987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40988#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
40989#[rustc_legacy_const_generics(4)]
40990pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
40991    src: __m128d,
40992    k: __mmask8,
40993    a: __m128d,
40994    b: __m128d,
40995) -> __m128d {
40996    unsafe {
40997        static_assert_sae!(SAE);
40998        let a = a.as_f64x2();
40999        let b = b.as_f64x2();
41000        let src = src.as_f64x2();
41001        let r = vgetexpsd(a, b, src, k, SAE);
41002        transmute(r)
41003    }
41004}
41005
41006/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
41007/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41008///
41009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
41010#[inline]
41011#[target_feature(enable = "avx512f")]
41012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41013#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
41014#[rustc_legacy_const_generics(3)]
41015pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
41016    unsafe {
41017        static_assert_sae!(SAE);
41018        let a = a.as_f64x2();
41019        let b = b.as_f64x2();
41020        let r = vgetexpsd(a, b, f64x2::ZERO, k, SAE);
41021        transmute(r)
41022    }
41023}
41024
41025/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41026/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41027///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41028///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41029///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41030///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41031/// The sign is determined by sc which can take the following values:\
41032///    _MM_MANT_SIGN_src     // sign = sign(src)\
41033///    _MM_MANT_SIGN_zero    // sign = 0\
41034///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41035/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41036///
41037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
41038#[inline]
41039#[target_feature(enable = "avx512f")]
41040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41041#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
41042#[rustc_legacy_const_generics(2, 3, 4)]
41043pub fn _mm_getmant_round_ss<
41044    const NORM: _MM_MANTISSA_NORM_ENUM,
41045    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41046    const SAE: i32,
41047>(
41048    a: __m128,
41049    b: __m128,
41050) -> __m128 {
41051    unsafe {
41052        static_assert_uimm_bits!(NORM, 4);
41053        static_assert_uimm_bits!(SIGN, 2);
41054        static_assert_mantissas_sae!(SAE);
41055        let a = a.as_f32x4();
41056        let b = b.as_f32x4();
41057        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, 0b1, SAE);
41058        transmute(r)
41059    }
41060}
41061
41062/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41063/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41064///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41065///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41066///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41067///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41068/// The sign is determined by sc which can take the following values:\
41069///    _MM_MANT_SIGN_src     // sign = sign(src)\
41070///    _MM_MANT_SIGN_zero    // sign = 0\
41071///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41072/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41073///
41074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
41075#[inline]
41076#[target_feature(enable = "avx512f")]
41077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41078#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
41079#[rustc_legacy_const_generics(4, 5, 6)]
41080pub fn _mm_mask_getmant_round_ss<
41081    const NORM: _MM_MANTISSA_NORM_ENUM,
41082    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41083    const SAE: i32,
41084>(
41085    src: __m128,
41086    k: __mmask8,
41087    a: __m128,
41088    b: __m128,
41089) -> __m128 {
41090    unsafe {
41091        static_assert_uimm_bits!(NORM, 4);
41092        static_assert_uimm_bits!(SIGN, 2);
41093        static_assert_mantissas_sae!(SAE);
41094        let a = a.as_f32x4();
41095        let b = b.as_f32x4();
41096        let src = src.as_f32x4();
41097        let r = vgetmantss(a, b, SIGN << 2 | NORM, src, k, SAE);
41098        transmute(r)
41099    }
41100}
41101
41102/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41103/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41104///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41105///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41106///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41107///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41108/// The sign is determined by sc which can take the following values:\
41109///    _MM_MANT_SIGN_src     // sign = sign(src)\
41110///    _MM_MANT_SIGN_zero    // sign = 0\
41111///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41112/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41113///
41114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
41115#[inline]
41116#[target_feature(enable = "avx512f")]
41117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41118#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
41119#[rustc_legacy_const_generics(3, 4, 5)]
41120pub fn _mm_maskz_getmant_round_ss<
41121    const NORM: _MM_MANTISSA_NORM_ENUM,
41122    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41123    const SAE: i32,
41124>(
41125    k: __mmask8,
41126    a: __m128,
41127    b: __m128,
41128) -> __m128 {
41129    unsafe {
41130        static_assert_uimm_bits!(NORM, 4);
41131        static_assert_uimm_bits!(SIGN, 2);
41132        static_assert_mantissas_sae!(SAE);
41133        let a = a.as_f32x4();
41134        let b = b.as_f32x4();
41135        let r = vgetmantss(a, b, SIGN << 2 | NORM, f32x4::ZERO, k, SAE);
41136        transmute(r)
41137    }
41138}
41139
41140/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41141/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41142///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41143///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41144///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41145///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41146/// The sign is determined by sc which can take the following values:\
41147///    _MM_MANT_SIGN_src     // sign = sign(src)\
41148///    _MM_MANT_SIGN_zero    // sign = 0\
41149///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41150/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41151///
41152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
41153#[inline]
41154#[target_feature(enable = "avx512f")]
41155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41156#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
41157#[rustc_legacy_const_generics(2, 3, 4)]
41158pub fn _mm_getmant_round_sd<
41159    const NORM: _MM_MANTISSA_NORM_ENUM,
41160    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41161    const SAE: i32,
41162>(
41163    a: __m128d,
41164    b: __m128d,
41165) -> __m128d {
41166    unsafe {
41167        static_assert_uimm_bits!(NORM, 4);
41168        static_assert_uimm_bits!(SIGN, 2);
41169        static_assert_mantissas_sae!(SAE);
41170        let a = a.as_f64x2();
41171        let b = b.as_f64x2();
41172        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, 0b1, SAE);
41173        transmute(r)
41174    }
41175}
41176
41177/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41178/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41179///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41180///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41181///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41182///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41183/// The sign is determined by sc which can take the following values:\
41184///    _MM_MANT_SIGN_src     // sign = sign(src)\
41185///    _MM_MANT_SIGN_zero    // sign = 0\
41186///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41187/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41188///
41189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
41190#[inline]
41191#[target_feature(enable = "avx512f")]
41192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41193#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
41194#[rustc_legacy_const_generics(4, 5, 6)]
41195pub fn _mm_mask_getmant_round_sd<
41196    const NORM: _MM_MANTISSA_NORM_ENUM,
41197    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41198    const SAE: i32,
41199>(
41200    src: __m128d,
41201    k: __mmask8,
41202    a: __m128d,
41203    b: __m128d,
41204) -> __m128d {
41205    unsafe {
41206        static_assert_uimm_bits!(NORM, 4);
41207        static_assert_uimm_bits!(SIGN, 2);
41208        static_assert_mantissas_sae!(SAE);
41209        let a = a.as_f64x2();
41210        let b = b.as_f64x2();
41211        let src = src.as_f64x2();
41212        let r = vgetmantsd(a, b, SIGN << 2 | NORM, src, k, SAE);
41213        transmute(r)
41214    }
41215}
41216
41217/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
41218/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
41219///    _MM_MANT_NORM_1_2     // interval [1, 2)\
41220///    _MM_MANT_NORM_p5_2    // interval [0.5, 2)\
41221///    _MM_MANT_NORM_p5_1    // interval [0.5, 1)\
41222///    _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
41223/// The sign is determined by sc which can take the following values:\
41224///    _MM_MANT_SIGN_src     // sign = sign(src)\
41225///    _MM_MANT_SIGN_zero    // sign = 0\
41226///    _MM_MANT_SIGN_nan     // dst = NaN if sign(src) = 1\
41227/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41228///
41229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
41230#[inline]
41231#[target_feature(enable = "avx512f")]
41232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41233#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
41234#[rustc_legacy_const_generics(3, 4, 5)]
41235pub fn _mm_maskz_getmant_round_sd<
41236    const NORM: _MM_MANTISSA_NORM_ENUM,
41237    const SIGN: _MM_MANTISSA_SIGN_ENUM,
41238    const SAE: i32,
41239>(
41240    k: __mmask8,
41241    a: __m128d,
41242    b: __m128d,
41243) -> __m128d {
41244    unsafe {
41245        static_assert_uimm_bits!(NORM, 4);
41246        static_assert_uimm_bits!(SIGN, 2);
41247        static_assert_mantissas_sae!(SAE);
41248        let a = a.as_f64x2();
41249        let b = b.as_f64x2();
41250        let r = vgetmantsd(a, b, SIGN << 2 | NORM, f64x2::ZERO, k, SAE);
41251        transmute(r)
41252    }
41253}
41254
41255/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41256/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41257/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41258/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41259/// * [`_MM_FROUND_TO_POS_INF`] : round up
41260/// * [`_MM_FROUND_TO_ZERO`] : truncate
41261/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41262///
41263/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
41265#[inline]
41266#[target_feature(enable = "avx512f")]
41267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41268#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
41269#[rustc_legacy_const_generics(2, 3)]
41270pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
41271    unsafe {
41272        static_assert_uimm_bits!(IMM8, 8);
41273        static_assert_mantissas_sae!(SAE);
41274        let a = a.as_f32x4();
41275        let b = b.as_f32x4();
41276        let r = vrndscaless(a, b, f32x4::ZERO, 0b11111111, IMM8, SAE);
41277        transmute(r)
41278    }
41279}
41280
41281/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41282/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41283/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41284/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41285/// * [`_MM_FROUND_TO_POS_INF`] : round up
41286/// * [`_MM_FROUND_TO_ZERO`] : truncate
41287/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41288///
41289/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41290/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
41291#[inline]
41292#[target_feature(enable = "avx512f")]
41293#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41294#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
41295#[rustc_legacy_const_generics(4, 5)]
41296pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
41297    src: __m128,
41298    k: __mmask8,
41299    a: __m128,
41300    b: __m128,
41301) -> __m128 {
41302    unsafe {
41303        static_assert_uimm_bits!(IMM8, 8);
41304        static_assert_mantissas_sae!(SAE);
41305        let a = a.as_f32x4();
41306        let b = b.as_f32x4();
41307        let src = src.as_f32x4();
41308        let r = vrndscaless(a, b, src, k, IMM8, SAE);
41309        transmute(r)
41310    }
41311}
41312
41313/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41314/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41315/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41316/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41317/// * [`_MM_FROUND_TO_POS_INF`] : round up
41318/// * [`_MM_FROUND_TO_ZERO`] : truncate
41319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41320///
41321/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
41323#[inline]
41324#[target_feature(enable = "avx512f")]
41325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41326#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
41327#[rustc_legacy_const_generics(3, 4)]
41328pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
41329    k: __mmask8,
41330    a: __m128,
41331    b: __m128,
41332) -> __m128 {
41333    unsafe {
41334        static_assert_uimm_bits!(IMM8, 8);
41335        static_assert_mantissas_sae!(SAE);
41336        let a = a.as_f32x4();
41337        let b = b.as_f32x4();
41338        let r = vrndscaless(a, b, f32x4::ZERO, k, IMM8, SAE);
41339        transmute(r)
41340    }
41341}
41342
41343/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41344/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41345/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41346/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41347/// * [`_MM_FROUND_TO_POS_INF`] : round up
41348/// * [`_MM_FROUND_TO_ZERO`] : truncate
41349/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41350///
41351/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
41353#[inline]
41354#[target_feature(enable = "avx512f")]
41355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41356#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
41357#[rustc_legacy_const_generics(2, 3)]
41358pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
41359    unsafe {
41360        static_assert_uimm_bits!(IMM8, 8);
41361        static_assert_mantissas_sae!(SAE);
41362        let a = a.as_f64x2();
41363        let b = b.as_f64x2();
41364        let r = vrndscalesd(a, b, f64x2::ZERO, 0b11111111, IMM8, SAE);
41365        transmute(r)
41366    }
41367}
41368
41369/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41370/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41371/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41372/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41373/// * [`_MM_FROUND_TO_POS_INF`] : round up
41374/// * [`_MM_FROUND_TO_ZERO`] : truncate
41375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41376///
41377/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
41379#[inline]
41380#[target_feature(enable = "avx512f")]
41381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41382#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
41383#[rustc_legacy_const_generics(4, 5)]
41384pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
41385    src: __m128d,
41386    k: __mmask8,
41387    a: __m128d,
41388    b: __m128d,
41389) -> __m128d {
41390    unsafe {
41391        static_assert_uimm_bits!(IMM8, 8);
41392        static_assert_mantissas_sae!(SAE);
41393        let a = a.as_f64x2();
41394        let b = b.as_f64x2();
41395        let src = src.as_f64x2();
41396        let r = vrndscalesd(a, b, src, k, IMM8, SAE);
41397        transmute(r)
41398    }
41399}
41400
41401/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41402/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
41403/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
41404/// * [`_MM_FROUND_TO_NEG_INF`] : round down
41405/// * [`_MM_FROUND_TO_POS_INF`] : round up
41406/// * [`_MM_FROUND_TO_ZERO`] : truncate
41407/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41408///
41409/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
41411#[inline]
41412#[target_feature(enable = "avx512f")]
41413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41414#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
41415#[rustc_legacy_const_generics(3, 4)]
41416pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
41417    k: __mmask8,
41418    a: __m128d,
41419    b: __m128d,
41420) -> __m128d {
41421    unsafe {
41422        static_assert_uimm_bits!(IMM8, 8);
41423        static_assert_mantissas_sae!(SAE);
41424        let a = a.as_f64x2();
41425        let b = b.as_f64x2();
41426        let r = vrndscalesd(a, b, f64x2::ZERO, k, IMM8, SAE);
41427        transmute(r)
41428    }
41429}
41430
41431/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41432///
41433/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41434/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41435/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41436/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41437/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41438/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41439///
41440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
41441#[inline]
41442#[target_feature(enable = "avx512f")]
41443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41444#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
41445#[rustc_legacy_const_generics(2)]
41446pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
41447    unsafe {
41448        static_assert_rounding!(ROUNDING);
41449        let a = a.as_f32x4();
41450        let b = b.as_f32x4();
41451        let r = vscalefss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
41452        transmute(r)
41453    }
41454}
41455
41456/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41457///
41458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41464///
41465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
41466#[inline]
41467#[target_feature(enable = "avx512f")]
41468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41469#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
41470#[rustc_legacy_const_generics(4)]
41471pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
41472    src: __m128,
41473    k: __mmask8,
41474    a: __m128,
41475    b: __m128,
41476) -> __m128 {
41477    unsafe {
41478        static_assert_rounding!(ROUNDING);
41479        let a = a.as_f32x4();
41480        let b = b.as_f32x4();
41481        let src = src.as_f32x4();
41482        let r = vscalefss(a, b, src, k, ROUNDING);
41483        transmute(r)
41484    }
41485}
41486
41487/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41488///
41489/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41490/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41491/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41492/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41493/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41494/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41495///
41496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
41497#[inline]
41498#[target_feature(enable = "avx512f")]
41499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41500#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
41501#[rustc_legacy_const_generics(3)]
41502pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
41503    unsafe {
41504        static_assert_rounding!(ROUNDING);
41505        let a = a.as_f32x4();
41506        let b = b.as_f32x4();
41507        let r = vscalefss(a, b, f32x4::ZERO, k, ROUNDING);
41508        transmute(r)
41509    }
41510}
41511
41512/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41513///
41514/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41515/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41516/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41517/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41518/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41519/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41520///
41521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
41522#[inline]
41523#[target_feature(enable = "avx512f")]
41524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41525#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
41526#[rustc_legacy_const_generics(2)]
41527pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
41528    unsafe {
41529        static_assert_rounding!(ROUNDING);
41530        let a = a.as_f64x2();
41531        let b = b.as_f64x2();
41532        let r = vscalefsd(a, b, f64x2::ZERO, 0b11111111, ROUNDING);
41533        transmute(r)
41534    }
41535}
41536
41537/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41538///
41539/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41540/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41541/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41542/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41543/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41544/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41545///
41546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
41547#[inline]
41548#[target_feature(enable = "avx512f")]
41549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41550#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
41551#[rustc_legacy_const_generics(4)]
41552pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
41553    src: __m128d,
41554    k: __mmask8,
41555    a: __m128d,
41556    b: __m128d,
41557) -> __m128d {
41558    unsafe {
41559        let a = a.as_f64x2();
41560        let b = b.as_f64x2();
41561        let src = src.as_f64x2();
41562        let r = vscalefsd(a, b, src, k, ROUNDING);
41563        transmute(r)
41564    }
41565}
41566
41567/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41568///
41569/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41575///
41576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
41577#[inline]
41578#[target_feature(enable = "avx512f")]
41579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41580#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
41581#[rustc_legacy_const_generics(3)]
41582pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
41583    k: __mmask8,
41584    a: __m128d,
41585    b: __m128d,
41586) -> __m128d {
41587    unsafe {
41588        static_assert_rounding!(ROUNDING);
41589        let a = a.as_f64x2();
41590        let b = b.as_f64x2();
41591        let r = vscalefsd(a, b, f64x2::ZERO, k, ROUNDING);
41592        transmute(r)
41593    }
41594}
41595
41596/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41597///
41598/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41599/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41600/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41601/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41602/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41603/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41604///
41605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
41606#[inline]
41607#[target_feature(enable = "avx512f")]
41608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41609#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41610#[rustc_legacy_const_generics(3)]
41611pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
41612    unsafe {
41613        static_assert_rounding!(ROUNDING);
41614        let extracta: f32 = simd_extract!(a, 0);
41615        let extractb: f32 = simd_extract!(b, 0);
41616        let extractc: f32 = simd_extract!(c, 0);
41617        let r = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41618        simd_insert!(a, 0, r)
41619    }
41620}
41621
41622/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41623///
41624/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41625/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41626/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41627/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41628/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41630///
41631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
41632#[inline]
41633#[target_feature(enable = "avx512f")]
41634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41635#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41636#[rustc_legacy_const_generics(4)]
41637pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
41638    a: __m128,
41639    k: __mmask8,
41640    b: __m128,
41641    c: __m128,
41642) -> __m128 {
41643    unsafe {
41644        static_assert_rounding!(ROUNDING);
41645        let mut fmadd: f32 = simd_extract!(a, 0);
41646        if (k & 0b00000001) != 0 {
41647            let extractb: f32 = simd_extract!(b, 0);
41648            let extractc: f32 = simd_extract!(c, 0);
41649            fmadd = vfmaddssround(fmadd, extractb, extractc, ROUNDING);
41650        }
41651        simd_insert!(a, 0, fmadd)
41652    }
41653}
41654
41655/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41656///
41657/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41658/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41659/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41660/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41661/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41662/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41663///
41664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
41665#[inline]
41666#[target_feature(enable = "avx512f")]
41667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41668#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41669#[rustc_legacy_const_generics(4)]
41670pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
41671    k: __mmask8,
41672    a: __m128,
41673    b: __m128,
41674    c: __m128,
41675) -> __m128 {
41676    unsafe {
41677        static_assert_rounding!(ROUNDING);
41678        let mut fmadd: f32 = 0.;
41679        if (k & 0b00000001) != 0 {
41680            let extracta: f32 = simd_extract!(a, 0);
41681            let extractb: f32 = simd_extract!(b, 0);
41682            let extractc: f32 = simd_extract!(c, 0);
41683            fmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41684        }
41685        simd_insert!(a, 0, fmadd)
41686    }
41687}
41688
41689/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
41690///
41691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41697///
41698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
41699#[inline]
41700#[target_feature(enable = "avx512f")]
41701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41702#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41703#[rustc_legacy_const_generics(4)]
41704pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
41705    a: __m128,
41706    b: __m128,
41707    c: __m128,
41708    k: __mmask8,
41709) -> __m128 {
41710    unsafe {
41711        static_assert_rounding!(ROUNDING);
41712        let mut fmadd: f32 = simd_extract!(c, 0);
41713        if (k & 0b00000001) != 0 {
41714            let extracta: f32 = simd_extract!(a, 0);
41715            let extractb: f32 = simd_extract!(b, 0);
41716            fmadd = vfmaddssround(extracta, extractb, fmadd, ROUNDING);
41717        }
41718        simd_insert!(c, 0, fmadd)
41719    }
41720}
41721
41722/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41723///
41724/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41725/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41726/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41727/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41728/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41729/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41730///
41731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
41732#[inline]
41733#[target_feature(enable = "avx512f")]
41734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41735#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41736#[rustc_legacy_const_generics(3)]
41737pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
41738    unsafe {
41739        static_assert_rounding!(ROUNDING);
41740        let extracta: f64 = simd_extract!(a, 0);
41741        let extractb: f64 = simd_extract!(b, 0);
41742        let extractc: f64 = simd_extract!(c, 0);
41743        let fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
41744        simd_insert!(a, 0, fmadd)
41745    }
41746}
41747
41748/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41749///
41750/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41751/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41752/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41753/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41754/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41755/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41756///
41757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
41758#[inline]
41759#[target_feature(enable = "avx512f")]
41760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41761#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41762#[rustc_legacy_const_generics(4)]
41763pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
41764    a: __m128d,
41765    k: __mmask8,
41766    b: __m128d,
41767    c: __m128d,
41768) -> __m128d {
41769    unsafe {
41770        static_assert_rounding!(ROUNDING);
41771        let mut fmadd: f64 = simd_extract!(a, 0);
41772        if (k & 0b00000001) != 0 {
41773            let extractb: f64 = simd_extract!(b, 0);
41774            let extractc: f64 = simd_extract!(c, 0);
41775            fmadd = vfmaddsdround(fmadd, extractb, extractc, ROUNDING);
41776        }
41777        simd_insert!(a, 0, fmadd)
41778    }
41779}
41780
41781/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41782///
41783/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41784/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41785/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41786/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41787/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41788/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41789///
41790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
41791#[inline]
41792#[target_feature(enable = "avx512f")]
41793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41794#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41795#[rustc_legacy_const_generics(4)]
41796pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
41797    k: __mmask8,
41798    a: __m128d,
41799    b: __m128d,
41800    c: __m128d,
41801) -> __m128d {
41802    unsafe {
41803        static_assert_rounding!(ROUNDING);
41804        let mut fmadd: f64 = 0.;
41805        if (k & 0b00000001) != 0 {
41806            let extracta: f64 = simd_extract!(a, 0);
41807            let extractb: f64 = simd_extract!(b, 0);
41808            let extractc: f64 = simd_extract!(c, 0);
41809            fmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
41810        }
41811        simd_insert!(a, 0, fmadd)
41812    }
41813}
41814
41815/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
41816///
41817/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41818/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41819/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41820/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41821/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41822/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41823///
41824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
41825#[inline]
41826#[target_feature(enable = "avx512f")]
41827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41828#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
41829#[rustc_legacy_const_generics(4)]
41830pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
41831    a: __m128d,
41832    b: __m128d,
41833    c: __m128d,
41834    k: __mmask8,
41835) -> __m128d {
41836    unsafe {
41837        static_assert_rounding!(ROUNDING);
41838        let mut fmadd: f64 = simd_extract!(c, 0);
41839        if (k & 0b00000001) != 0 {
41840            let extracta: f64 = simd_extract!(a, 0);
41841            let extractb: f64 = simd_extract!(b, 0);
41842            fmadd = vfmaddsdround(extracta, extractb, fmadd, ROUNDING);
41843        }
41844        simd_insert!(c, 0, fmadd)
41845    }
41846}
41847
41848/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41849///
41850/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41851/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41852/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41853/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41854/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41855/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41856///
41857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
41858#[inline]
41859#[target_feature(enable = "avx512f")]
41860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41861#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41862#[rustc_legacy_const_generics(3)]
41863pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
41864    unsafe {
41865        static_assert_rounding!(ROUNDING);
41866        let extracta: f32 = simd_extract!(a, 0);
41867        let extractb: f32 = simd_extract!(b, 0);
41868        let extractc: f32 = simd_extract!(c, 0);
41869        let extractc = -extractc;
41870        let fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41871        simd_insert!(a, 0, fmsub)
41872    }
41873}
41874
41875/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41876///
41877/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41878/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41879/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41880/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41881/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41882/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41883///
41884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
41885#[inline]
41886#[target_feature(enable = "avx512f")]
41887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41888#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41889#[rustc_legacy_const_generics(4)]
41890pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
41891    a: __m128,
41892    k: __mmask8,
41893    b: __m128,
41894    c: __m128,
41895) -> __m128 {
41896    unsafe {
41897        static_assert_rounding!(ROUNDING);
41898        let mut fmsub: f32 = simd_extract!(a, 0);
41899        if (k & 0b00000001) != 0 {
41900            let extractb: f32 = simd_extract!(b, 0);
41901            let extractc: f32 = simd_extract!(c, 0);
41902            let extractc = -extractc;
41903            fmsub = vfmaddssround(fmsub, extractb, extractc, ROUNDING);
41904        }
41905        simd_insert!(a, 0, fmsub)
41906    }
41907}
41908
41909/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41910///
41911/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41912/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41913/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41914/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41915/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41917///
41918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
41919#[inline]
41920#[target_feature(enable = "avx512f")]
41921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41922#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41923#[rustc_legacy_const_generics(4)]
41924pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
41925    k: __mmask8,
41926    a: __m128,
41927    b: __m128,
41928    c: __m128,
41929) -> __m128 {
41930    unsafe {
41931        static_assert_rounding!(ROUNDING);
41932        let mut fmsub: f32 = 0.;
41933        if (k & 0b00000001) != 0 {
41934            let extracta: f32 = simd_extract!(a, 0);
41935            let extractb: f32 = simd_extract!(b, 0);
41936            let extractc: f32 = simd_extract!(c, 0);
41937            let extractc = -extractc;
41938            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41939        }
41940        simd_insert!(a, 0, fmsub)
41941    }
41942}
41943
41944/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
41945///
41946/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41947/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41948/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41949/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41950/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41951/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41952///
41953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
41954#[inline]
41955#[target_feature(enable = "avx512f")]
41956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41957#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41958#[rustc_legacy_const_generics(4)]
41959pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
41960    a: __m128,
41961    b: __m128,
41962    c: __m128,
41963    k: __mmask8,
41964) -> __m128 {
41965    unsafe {
41966        static_assert_rounding!(ROUNDING);
41967        let mut fmsub: f32 = simd_extract!(c, 0);
41968        if (k & 0b00000001) != 0 {
41969            let extracta: f32 = simd_extract!(a, 0);
41970            let extractb: f32 = simd_extract!(b, 0);
41971            let extractc = -fmsub;
41972            fmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
41973        }
41974        simd_insert!(c, 0, fmsub)
41975    }
41976}
41977
41978/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41979///
41980/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41981/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41982/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41983/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41984/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41985/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41986///
41987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
41988#[inline]
41989#[target_feature(enable = "avx512f")]
41990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41991#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
41992#[rustc_legacy_const_generics(3)]
41993pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
41994    unsafe {
41995        static_assert_rounding!(ROUNDING);
41996        let extracta: f64 = simd_extract!(a, 0);
41997        let extractb: f64 = simd_extract!(b, 0);
41998        let extractc: f64 = simd_extract!(c, 0);
41999        let extractc = -extractc;
42000        let fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42001        simd_insert!(a, 0, fmsub)
42002    }
42003}
42004
42005/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42006///
42007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42013///
42014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
42015#[inline]
42016#[target_feature(enable = "avx512f")]
42017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42018#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
42019#[rustc_legacy_const_generics(4)]
42020pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
42021    a: __m128d,
42022    k: __mmask8,
42023    b: __m128d,
42024    c: __m128d,
42025) -> __m128d {
42026    unsafe {
42027        static_assert_rounding!(ROUNDING);
42028        let mut fmsub: f64 = simd_extract!(a, 0);
42029        if (k & 0b00000001) != 0 {
42030            let extractb: f64 = simd_extract!(b, 0);
42031            let extractc: f64 = simd_extract!(c, 0);
42032            let extractc = -extractc;
42033            fmsub = vfmaddsdround(fmsub, extractb, extractc, ROUNDING);
42034        }
42035        simd_insert!(a, 0, fmsub)
42036    }
42037}
42038
42039/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42040///
42041/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42042/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42043/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42044/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42045/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42046/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42047///
42048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
42049#[inline]
42050#[target_feature(enable = "avx512f")]
42051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42052#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
42053#[rustc_legacy_const_generics(4)]
42054pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
42055    k: __mmask8,
42056    a: __m128d,
42057    b: __m128d,
42058    c: __m128d,
42059) -> __m128d {
42060    unsafe {
42061        static_assert_rounding!(ROUNDING);
42062        let mut fmsub: f64 = 0.;
42063        if (k & 0b00000001) != 0 {
42064            let extracta: f64 = simd_extract!(a, 0);
42065            let extractb: f64 = simd_extract!(b, 0);
42066            let extractc: f64 = simd_extract!(c, 0);
42067            let extractc = -extractc;
42068            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42069        }
42070        simd_insert!(a, 0, fmsub)
42071    }
42072}
42073
42074/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42075///
42076/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42077/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42078/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42079/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42080/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42081/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42082///
42083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
42084#[inline]
42085#[target_feature(enable = "avx512f")]
42086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42087#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
42088#[rustc_legacy_const_generics(4)]
42089pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
42090    a: __m128d,
42091    b: __m128d,
42092    c: __m128d,
42093    k: __mmask8,
42094) -> __m128d {
42095    unsafe {
42096        static_assert_rounding!(ROUNDING);
42097        let mut fmsub: f64 = simd_extract!(c, 0);
42098        if (k & 0b00000001) != 0 {
42099            let extracta: f64 = simd_extract!(a, 0);
42100            let extractb: f64 = simd_extract!(b, 0);
42101            let extractc = -fmsub;
42102            fmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42103        }
42104        simd_insert!(c, 0, fmsub)
42105    }
42106}
42107
42108/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
42109///
42110/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42111/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42112/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42113/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42114/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42115/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42116///
42117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
42118#[inline]
42119#[target_feature(enable = "avx512f")]
42120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42121#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42122#[rustc_legacy_const_generics(3)]
42123pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
42124    unsafe {
42125        static_assert_rounding!(ROUNDING);
42126        let extracta: f32 = simd_extract!(a, 0);
42127        let extracta = -extracta;
42128        let extractb: f32 = simd_extract!(b, 0);
42129        let extractc: f32 = simd_extract!(c, 0);
42130        let fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42131        simd_insert!(a, 0, fnmadd)
42132    }
42133}
42134
42135/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42136///
42137/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42138/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42139/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42140/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42141/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42142/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42143///
42144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
42145#[inline]
42146#[target_feature(enable = "avx512f")]
42147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42148#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42149#[rustc_legacy_const_generics(4)]
42150pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
42151    a: __m128,
42152    k: __mmask8,
42153    b: __m128,
42154    c: __m128,
42155) -> __m128 {
42156    unsafe {
42157        static_assert_rounding!(ROUNDING);
42158        let mut fnmadd: f32 = simd_extract!(a, 0);
42159        if (k & 0b00000001) != 0 {
42160            let extracta = -fnmadd;
42161            let extractb: f32 = simd_extract!(b, 0);
42162            let extractc: f32 = simd_extract!(c, 0);
42163            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42164        }
42165        simd_insert!(a, 0, fnmadd)
42166    }
42167}
42168
42169/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42170///
42171/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42172/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42173/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42174/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42175/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42176/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42177///
42178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
42179#[inline]
42180#[target_feature(enable = "avx512f")]
42181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42182#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42183#[rustc_legacy_const_generics(4)]
42184pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
42185    k: __mmask8,
42186    a: __m128,
42187    b: __m128,
42188    c: __m128,
42189) -> __m128 {
42190    unsafe {
42191        static_assert_rounding!(ROUNDING);
42192        let mut fnmadd: f32 = 0.;
42193        if (k & 0b00000001) != 0 {
42194            let extracta: f32 = simd_extract!(a, 0);
42195            let extracta = -extracta;
42196            let extractb: f32 = simd_extract!(b, 0);
42197            let extractc: f32 = simd_extract!(c, 0);
42198            fnmadd = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42199        }
42200        simd_insert!(a, 0, fnmadd)
42201    }
42202}
42203
42204/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
42205///
42206/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42207/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42208/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42209/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42210/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42211/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42212///
42213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
42214#[inline]
42215#[target_feature(enable = "avx512f")]
42216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42217#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42218#[rustc_legacy_const_generics(4)]
42219pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
42220    a: __m128,
42221    b: __m128,
42222    c: __m128,
42223    k: __mmask8,
42224) -> __m128 {
42225    unsafe {
42226        static_assert_rounding!(ROUNDING);
42227        let mut fnmadd: f32 = simd_extract!(c, 0);
42228        if (k & 0b00000001) != 0 {
42229            let extracta: f32 = simd_extract!(a, 0);
42230            let extracta = -extracta;
42231            let extractb: f32 = simd_extract!(b, 0);
42232            fnmadd = vfmaddssround(extracta, extractb, fnmadd, ROUNDING);
42233        }
42234        simd_insert!(c, 0, fnmadd)
42235    }
42236}
42237
42238/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
42239///
42240/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42241/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42242/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42243/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42244/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42245/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42246///
42247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
42248#[inline]
42249#[target_feature(enable = "avx512f")]
42250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42251#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42252#[rustc_legacy_const_generics(3)]
42253pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
42254    unsafe {
42255        static_assert_rounding!(ROUNDING);
42256        let extracta: f64 = simd_extract!(a, 0);
42257        let extracta = -extracta;
42258        let extractb: f64 = simd_extract!(b, 0);
42259        let extractc: f64 = simd_extract!(c, 0);
42260        let fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42261        simd_insert!(a, 0, fnmadd)
42262    }
42263}
42264
42265/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42266///
42267/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42268/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42269/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42270/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42271/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42272/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42273///
42274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
42275#[inline]
42276#[target_feature(enable = "avx512f")]
42277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42278#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42279#[rustc_legacy_const_generics(4)]
42280pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
42281    a: __m128d,
42282    k: __mmask8,
42283    b: __m128d,
42284    c: __m128d,
42285) -> __m128d {
42286    unsafe {
42287        static_assert_rounding!(ROUNDING);
42288        let mut fnmadd: f64 = simd_extract!(a, 0);
42289        if (k & 0b00000001) != 0 {
42290            let extracta = -fnmadd;
42291            let extractb: f64 = simd_extract!(b, 0);
42292            let extractc: f64 = simd_extract!(c, 0);
42293            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42294        }
42295        simd_insert!(a, 0, fnmadd)
42296    }
42297}
42298
42299/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42300///
42301/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42302/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42303/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42304/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42305/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42306/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42307///
42308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
42309#[inline]
42310#[target_feature(enable = "avx512f")]
42311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42312#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42313#[rustc_legacy_const_generics(4)]
42314pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
42315    k: __mmask8,
42316    a: __m128d,
42317    b: __m128d,
42318    c: __m128d,
42319) -> __m128d {
42320    unsafe {
42321        static_assert_rounding!(ROUNDING);
42322        let mut fnmadd: f64 = 0.;
42323        if (k & 0b00000001) != 0 {
42324            let extracta: f64 = simd_extract!(a, 0);
42325            let extracta = -extracta;
42326            let extractb: f64 = simd_extract!(b, 0);
42327            let extractc: f64 = simd_extract!(c, 0);
42328            fnmadd = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42329        }
42330        simd_insert!(a, 0, fnmadd)
42331    }
42332}
42333
42334/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42335///
42336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42342///
42343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
42344#[inline]
42345#[target_feature(enable = "avx512f")]
42346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
42348#[rustc_legacy_const_generics(4)]
42349pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
42350    a: __m128d,
42351    b: __m128d,
42352    c: __m128d,
42353    k: __mmask8,
42354) -> __m128d {
42355    unsafe {
42356        static_assert_rounding!(ROUNDING);
42357        let mut fnmadd: f64 = simd_extract!(c, 0);
42358        if (k & 0b00000001) != 0 {
42359            let extracta: f64 = simd_extract!(a, 0);
42360            let extracta = -extracta;
42361            let extractb: f64 = simd_extract!(b, 0);
42362            fnmadd = vfmaddsdround(extracta, extractb, fnmadd, ROUNDING);
42363        }
42364        simd_insert!(c, 0, fnmadd)
42365    }
42366}
42367
42368/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
42369///
42370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42376///
42377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
42378#[inline]
42379#[target_feature(enable = "avx512f")]
42380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42381#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42382#[rustc_legacy_const_generics(3)]
42383pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
42384    unsafe {
42385        static_assert_rounding!(ROUNDING);
42386        let extracta: f32 = simd_extract!(a, 0);
42387        let extracta = -extracta;
42388        let extractb: f32 = simd_extract!(b, 0);
42389        let extractc: f32 = simd_extract!(c, 0);
42390        let extractc = -extractc;
42391        let fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42392        simd_insert!(a, 0, fnmsub)
42393    }
42394}
42395
42396/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42397///
42398/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42399/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42400/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42401/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42402/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42403/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42404///
42405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
42406#[inline]
42407#[target_feature(enable = "avx512f")]
42408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42409#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42410#[rustc_legacy_const_generics(4)]
42411pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
42412    a: __m128,
42413    k: __mmask8,
42414    b: __m128,
42415    c: __m128,
42416) -> __m128 {
42417    unsafe {
42418        static_assert_rounding!(ROUNDING);
42419        let mut fnmsub: f32 = simd_extract!(a, 0);
42420        if (k & 0b00000001) != 0 {
42421            let extracta = -fnmsub;
42422            let extractb: f32 = simd_extract!(b, 0);
42423            let extractc: f32 = simd_extract!(c, 0);
42424            let extractc = -extractc;
42425            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42426        }
42427        simd_insert!(a, 0, fnmsub)
42428    }
42429}
42430
42431/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
42432///
42433/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42434/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42435/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42436/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42437/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42438/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42439///
42440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
42441#[inline]
42442#[target_feature(enable = "avx512f")]
42443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42444#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42445#[rustc_legacy_const_generics(4)]
42446pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
42447    k: __mmask8,
42448    a: __m128,
42449    b: __m128,
42450    c: __m128,
42451) -> __m128 {
42452    unsafe {
42453        static_assert_rounding!(ROUNDING);
42454        let mut fnmsub: f32 = 0.;
42455        if (k & 0b00000001) != 0 {
42456            let extracta: f32 = simd_extract!(a, 0);
42457            let extracta = -extracta;
42458            let extractb: f32 = simd_extract!(b, 0);
42459            let extractc: f32 = simd_extract!(c, 0);
42460            let extractc = -extractc;
42461            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42462        }
42463        simd_insert!(a, 0, fnmsub)
42464    }
42465}
42466
42467/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
42468///
42469/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42470/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42471/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42472/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42473/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42474/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42475///
42476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
42477#[inline]
42478#[target_feature(enable = "avx512f")]
42479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42480#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42481#[rustc_legacy_const_generics(4)]
42482pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
42483    a: __m128,
42484    b: __m128,
42485    c: __m128,
42486    k: __mmask8,
42487) -> __m128 {
42488    unsafe {
42489        static_assert_rounding!(ROUNDING);
42490        let mut fnmsub: f32 = simd_extract!(c, 0);
42491        if (k & 0b00000001) != 0 {
42492            let extracta: f32 = simd_extract!(a, 0);
42493            let extracta = -extracta;
42494            let extractb: f32 = simd_extract!(b, 0);
42495            let extractc = -fnmsub;
42496            fnmsub = vfmaddssround(extracta, extractb, extractc, ROUNDING);
42497        }
42498        simd_insert!(c, 0, fnmsub)
42499    }
42500}
42501
42502/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
42503///
42504/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42505/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42506/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42507/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42508/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42509/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42510///
42511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
42512#[inline]
42513#[target_feature(enable = "avx512f")]
42514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42515#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42516#[rustc_legacy_const_generics(3)]
42517pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
42518    unsafe {
42519        static_assert_rounding!(ROUNDING);
42520        let extracta: f64 = simd_extract!(a, 0);
42521        let extracta = -extracta;
42522        let extractb: f64 = simd_extract!(b, 0);
42523        let extractc: f64 = simd_extract!(c, 0);
42524        let extractc = -extractc;
42525        let fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42526        simd_insert!(a, 0, fnmsub)
42527    }
42528}
42529
42530/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42531///
42532/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42533/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42534/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42535/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42536/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42537/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42538///
42539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
42540#[inline]
42541#[target_feature(enable = "avx512f")]
42542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42543#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42544#[rustc_legacy_const_generics(4)]
42545pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
42546    a: __m128d,
42547    k: __mmask8,
42548    b: __m128d,
42549    c: __m128d,
42550) -> __m128d {
42551    unsafe {
42552        static_assert_rounding!(ROUNDING);
42553        let mut fnmsub: f64 = simd_extract!(a, 0);
42554        if (k & 0b00000001) != 0 {
42555            let extracta = -fnmsub;
42556            let extractb: f64 = simd_extract!(b, 0);
42557            let extractc: f64 = simd_extract!(c, 0);
42558            let extractc = -extractc;
42559            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42560        }
42561        simd_insert!(a, 0, fnmsub)
42562    }
42563}
42564
42565/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
42566///
42567/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42568/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42569/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42570/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42571/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42572/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42573///
42574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
42575#[inline]
42576#[target_feature(enable = "avx512f")]
42577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42578#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42579#[rustc_legacy_const_generics(4)]
42580pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
42581    k: __mmask8,
42582    a: __m128d,
42583    b: __m128d,
42584    c: __m128d,
42585) -> __m128d {
42586    unsafe {
42587        static_assert_rounding!(ROUNDING);
42588        let mut fnmsub: f64 = 0.;
42589        if (k & 0b00000001) != 0 {
42590            let extracta: f64 = simd_extract!(a, 0);
42591            let extracta = -extracta;
42592            let extractb: f64 = simd_extract!(b, 0);
42593            let extractc: f64 = simd_extract!(c, 0);
42594            let extractc = -extractc;
42595            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42596        }
42597        simd_insert!(a, 0, fnmsub)
42598    }
42599}
42600
42601/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
42602///
42603/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
42604/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
42605/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
42606/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
42607/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
42608/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
42609///
42610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
42611#[inline]
42612#[target_feature(enable = "avx512f")]
42613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42614#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
42615#[rustc_legacy_const_generics(4)]
42616pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
42617    a: __m128d,
42618    b: __m128d,
42619    c: __m128d,
42620    k: __mmask8,
42621) -> __m128d {
42622    unsafe {
42623        static_assert_rounding!(ROUNDING);
42624        let mut fnmsub: f64 = simd_extract!(c, 0);
42625        if (k & 0b00000001) != 0 {
42626            let extracta: f64 = simd_extract!(a, 0);
42627            let extracta = -extracta;
42628            let extractb: f64 = simd_extract!(b, 0);
42629            let extractc = -fnmsub;
42630            fnmsub = vfmaddsdround(extracta, extractb, extractc, ROUNDING);
42631        }
42632        simd_insert!(c, 0, fnmsub)
42633    }
42634}
42635
42636/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42637///
42638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
42639#[inline]
42640#[target_feature(enable = "avx512f")]
42641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42642#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
42643#[rustc_legacy_const_generics(3)]
42644pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
42645    unsafe {
42646        static_assert_uimm_bits!(IMM8, 8);
42647        let a = a.as_f32x4();
42648        let b = b.as_f32x4();
42649        let c = c.as_i32x4();
42650        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
42651        let fixupimm: f32 = simd_extract!(r, 0);
42652        let r = simd_insert!(a, 0, fixupimm);
42653        transmute(r)
42654    }
42655}
42656
42657/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42658///
42659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
42660#[inline]
42661#[target_feature(enable = "avx512f")]
42662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42663#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
42664#[rustc_legacy_const_generics(4)]
42665pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
42666    a: __m128,
42667    k: __mmask8,
42668    b: __m128,
42669    c: __m128i,
42670) -> __m128 {
42671    unsafe {
42672        static_assert_uimm_bits!(IMM8, 8);
42673        let a = a.as_f32x4();
42674        let b = b.as_f32x4();
42675        let c = c.as_i32x4();
42676        let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
42677        let fixupimm: f32 = simd_extract!(fixupimm, 0);
42678        let r = simd_insert!(a, 0, fixupimm);
42679        transmute(r)
42680    }
42681}
42682
42683/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
42684///
42685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
42686#[inline]
42687#[target_feature(enable = "avx512f")]
42688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42689#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
42690#[rustc_legacy_const_generics(4)]
42691pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
42692    k: __mmask8,
42693    a: __m128,
42694    b: __m128,
42695    c: __m128i,
42696) -> __m128 {
42697    unsafe {
42698        static_assert_uimm_bits!(IMM8, 8);
42699        let a = a.as_f32x4();
42700        let b = b.as_f32x4();
42701        let c = c.as_i32x4();
42702        let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
42703        let fixupimm: f32 = simd_extract!(fixupimm, 0);
42704        let r = simd_insert!(a, 0, fixupimm);
42705        transmute(r)
42706    }
42707}
42708
42709/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42710///
42711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
42712#[inline]
42713#[target_feature(enable = "avx512f")]
42714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42715#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
42716#[rustc_legacy_const_generics(3)]
42717pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
42718    unsafe {
42719        static_assert_uimm_bits!(IMM8, 8);
42720        let a = a.as_f64x2();
42721        let b = b.as_f64x2();
42722        let c = c.as_i64x2();
42723        let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
42724        let fixupimm: f64 = simd_extract!(fixupimm, 0);
42725        let r = simd_insert!(a, 0, fixupimm);
42726        transmute(r)
42727    }
42728}
42729
42730/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42731///
42732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
42733#[inline]
42734#[target_feature(enable = "avx512f")]
42735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42736#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
42737#[rustc_legacy_const_generics(4)]
42738pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
42739    a: __m128d,
42740    k: __mmask8,
42741    b: __m128d,
42742    c: __m128i,
42743) -> __m128d {
42744    unsafe {
42745        static_assert_uimm_bits!(IMM8, 8);
42746        let a = a.as_f64x2();
42747        let b = b.as_f64x2();
42748        let c = c.as_i64x2();
42749        let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
42750        let fixupimm: f64 = simd_extract!(fixupimm, 0);
42751        let r = simd_insert!(a, 0, fixupimm);
42752        transmute(r)
42753    }
42754}
42755
42756/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
42757///
42758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
42759#[inline]
42760#[target_feature(enable = "avx512f")]
42761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42762#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
42763#[rustc_legacy_const_generics(4)]
42764pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
42765    k: __mmask8,
42766    a: __m128d,
42767    b: __m128d,
42768    c: __m128i,
42769) -> __m128d {
42770    unsafe {
42771        static_assert_uimm_bits!(IMM8, 8);
42772        let a = a.as_f64x2();
42773        let b = b.as_f64x2();
42774        let c = c.as_i64x2();
42775        let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
42776        let fixupimm: f64 = simd_extract!(fixupimm, 0);
42777        let r = simd_insert!(a, 0, fixupimm);
42778        transmute(r)
42779    }
42780}
42781
42782/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42783/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42784///
42785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
42786#[inline]
42787#[target_feature(enable = "avx512f")]
42788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42789#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
42790#[rustc_legacy_const_generics(3, 4)]
42791pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42792    a: __m128,
42793    b: __m128,
42794    c: __m128i,
42795) -> __m128 {
42796    unsafe {
42797        static_assert_uimm_bits!(IMM8, 8);
42798        static_assert_mantissas_sae!(SAE);
42799        let a = a.as_f32x4();
42800        let b = b.as_f32x4();
42801        let c = c.as_i32x4();
42802        let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
42803        let fixupimm: f32 = simd_extract!(r, 0);
42804        let r = simd_insert!(a, 0, fixupimm);
42805        transmute(r)
42806    }
42807}
42808
42809/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42810/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42811///
42812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
42813#[inline]
42814#[target_feature(enable = "avx512f")]
42815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42816#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
42817#[rustc_legacy_const_generics(4, 5)]
42818pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42819    a: __m128,
42820    k: __mmask8,
42821    b: __m128,
42822    c: __m128i,
42823) -> __m128 {
42824    unsafe {
42825        static_assert_uimm_bits!(IMM8, 8);
42826        static_assert_mantissas_sae!(SAE);
42827        let a = a.as_f32x4();
42828        let b = b.as_f32x4();
42829        let c = c.as_i32x4();
42830        let r = vfixupimmss(a, b, c, IMM8, k, SAE);
42831        let fixupimm: f32 = simd_extract!(r, 0);
42832        let r = simd_insert!(a, 0, fixupimm);
42833        transmute(r)
42834    }
42835}
42836
42837/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
42838/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42839///
42840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
42841#[inline]
42842#[target_feature(enable = "avx512f")]
42843#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42844#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
42845#[rustc_legacy_const_generics(4, 5)]
42846pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
42847    k: __mmask8,
42848    a: __m128,
42849    b: __m128,
42850    c: __m128i,
42851) -> __m128 {
42852    unsafe {
42853        static_assert_uimm_bits!(IMM8, 8);
42854        static_assert_mantissas_sae!(SAE);
42855        let a = a.as_f32x4();
42856        let b = b.as_f32x4();
42857        let c = c.as_i32x4();
42858        let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
42859        let fixupimm: f32 = simd_extract!(r, 0);
42860        let r = simd_insert!(a, 0, fixupimm);
42861        transmute(r)
42862    }
42863}
42864
42865/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42866/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42867///
42868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
42869#[inline]
42870#[target_feature(enable = "avx512f")]
42871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42872#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
42873#[rustc_legacy_const_generics(3, 4)]
42874pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42875    a: __m128d,
42876    b: __m128d,
42877    c: __m128i,
42878) -> __m128d {
42879    unsafe {
42880        static_assert_uimm_bits!(IMM8, 8);
42881        static_assert_mantissas_sae!(SAE);
42882        let a = a.as_f64x2();
42883        let b = b.as_f64x2();
42884        let c = c.as_i64x2();
42885        let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
42886        let fixupimm: f64 = simd_extract!(r, 0);
42887        let r = simd_insert!(a, 0, fixupimm);
42888        transmute(r)
42889    }
42890}
42891
42892/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42893/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42894///
42895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
42896#[inline]
42897#[target_feature(enable = "avx512f")]
42898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42899#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
42900#[rustc_legacy_const_generics(4, 5)]
42901pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42902    a: __m128d,
42903    k: __mmask8,
42904    b: __m128d,
42905    c: __m128i,
42906) -> __m128d {
42907    unsafe {
42908        static_assert_uimm_bits!(IMM8, 8);
42909        static_assert_mantissas_sae!(SAE);
42910        let a = a.as_f64x2();
42911        let b = b.as_f64x2();
42912        let c = c.as_i64x2();
42913        let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
42914        let fixupimm: f64 = simd_extract!(r, 0);
42915        let r = simd_insert!(a, 0, fixupimm);
42916        transmute(r)
42917    }
42918}
42919
42920/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
42921/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
42922///
42923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
42924#[inline]
42925#[target_feature(enable = "avx512f")]
42926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42927#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
42928#[rustc_legacy_const_generics(4, 5)]
42929pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
42930    k: __mmask8,
42931    a: __m128d,
42932    b: __m128d,
42933    c: __m128i,
42934) -> __m128d {
42935    unsafe {
42936        static_assert_uimm_bits!(IMM8, 8);
42937        static_assert_mantissas_sae!(SAE);
42938        let a = a.as_f64x2();
42939        let b = b.as_f64x2();
42940        let c = c.as_i64x2();
42941        let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
42942        let fixupimm: f64 = simd_extract!(r, 0);
42943        let r = simd_insert!(a, 0, fixupimm);
42944        transmute(r)
42945    }
42946}
42947
42948/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
42949///
42950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
42951#[inline]
42952#[target_feature(enable = "avx512f")]
42953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42954#[cfg_attr(test, assert_instr(vcvtss2sd))]
42955pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
42956    unsafe {
42957        transmute(vcvtss2sd(
42958            a.as_f64x2(),
42959            b.as_f32x4(),
42960            src.as_f64x2(),
42961            k,
42962            _MM_FROUND_CUR_DIRECTION,
42963        ))
42964    }
42965}
42966
42967/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
42968///
42969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
42970#[inline]
42971#[target_feature(enable = "avx512f")]
42972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42973#[cfg_attr(test, assert_instr(vcvtss2sd))]
42974pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
42975    unsafe {
42976        transmute(vcvtss2sd(
42977            a.as_f64x2(),
42978            b.as_f32x4(),
42979            f64x2::ZERO,
42980            k,
42981            _MM_FROUND_CUR_DIRECTION,
42982        ))
42983    }
42984}
42985
42986/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
42987///
42988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
42989#[inline]
42990#[target_feature(enable = "avx512f")]
42991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42992#[cfg_attr(test, assert_instr(vcvtsd2ss))]
42993pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
42994    unsafe {
42995        transmute(vcvtsd2ss(
42996            a.as_f32x4(),
42997            b.as_f64x2(),
42998            src.as_f32x4(),
42999            k,
43000            _MM_FROUND_CUR_DIRECTION,
43001        ))
43002    }
43003}
43004
43005/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
43006///
43007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
43008#[inline]
43009#[target_feature(enable = "avx512f")]
43010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43011#[cfg_attr(test, assert_instr(vcvtsd2ss))]
43012pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
43013    unsafe {
43014        transmute(vcvtsd2ss(
43015            a.as_f32x4(),
43016            b.as_f64x2(),
43017            f32x4::ZERO,
43018            k,
43019            _MM_FROUND_CUR_DIRECTION,
43020        ))
43021    }
43022}
43023
43024/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
43025/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43026///
43027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
43028#[inline]
43029#[target_feature(enable = "avx512f")]
43030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43031#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
43032#[rustc_legacy_const_generics(2)]
43033pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
43034    unsafe {
43035        static_assert_sae!(SAE);
43036        let a = a.as_f64x2();
43037        let b = b.as_f32x4();
43038        let r = vcvtss2sd(a, b, f64x2::ZERO, 0b11111111, SAE);
43039        transmute(r)
43040    }
43041}
43042
43043/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
43044/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43045///
43046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
43047#[inline]
43048#[target_feature(enable = "avx512f")]
43049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43050#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
43051#[rustc_legacy_const_generics(4)]
43052pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
43053    src: __m128d,
43054    k: __mmask8,
43055    a: __m128d,
43056    b: __m128,
43057) -> __m128d {
43058    unsafe {
43059        static_assert_sae!(SAE);
43060        let a = a.as_f64x2();
43061        let b = b.as_f32x4();
43062        let src = src.as_f64x2();
43063        let r = vcvtss2sd(a, b, src, k, SAE);
43064        transmute(r)
43065    }
43066}
43067
43068/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
43069/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43070///
43071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
43072#[inline]
43073#[target_feature(enable = "avx512f")]
43074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43075#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
43076#[rustc_legacy_const_generics(3)]
43077pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
43078    unsafe {
43079        static_assert_sae!(SAE);
43080        let a = a.as_f64x2();
43081        let b = b.as_f32x4();
43082        let r = vcvtss2sd(a, b, f64x2::ZERO, k, SAE);
43083        transmute(r)
43084    }
43085}
43086
43087/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43088/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43089/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43090/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43091/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43092/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43093/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43094///
43095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
43096#[inline]
43097#[target_feature(enable = "avx512f")]
43098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43099#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
43100#[rustc_legacy_const_generics(2)]
43101pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
43102    unsafe {
43103        static_assert_rounding!(ROUNDING);
43104        let a = a.as_f32x4();
43105        let b = b.as_f64x2();
43106        let r = vcvtsd2ss(a, b, f32x4::ZERO, 0b11111111, ROUNDING);
43107        transmute(r)
43108    }
43109}
43110
43111/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
43112/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43113/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43114/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43115/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43116/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43117/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43118///
43119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
43120#[inline]
43121#[target_feature(enable = "avx512f")]
43122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43123#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
43124#[rustc_legacy_const_generics(4)]
43125pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
43126    src: __m128,
43127    k: __mmask8,
43128    a: __m128,
43129    b: __m128d,
43130) -> __m128 {
43131    unsafe {
43132        static_assert_rounding!(ROUNDING);
43133        let a = a.as_f32x4();
43134        let b = b.as_f64x2();
43135        let src = src.as_f32x4();
43136        let r = vcvtsd2ss(a, b, src, k, ROUNDING);
43137        transmute(r)
43138    }
43139}
43140
43141/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
43142/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43143/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43144/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43145/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43146/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43147/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43148///
43149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
43150#[inline]
43151#[target_feature(enable = "avx512f")]
43152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43153#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
43154#[rustc_legacy_const_generics(3)]
43155pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
43156    unsafe {
43157        static_assert_rounding!(ROUNDING);
43158        let a = a.as_f32x4();
43159        let b = b.as_f64x2();
43160        let r = vcvtsd2ss(a, b, f32x4::ZERO, k, ROUNDING);
43161        transmute(r)
43162    }
43163}
43164
43165/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43166/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43167/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43168/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43169/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43170/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43171/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43172///
43173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
43174#[inline]
43175#[target_feature(enable = "avx512f")]
43176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43177#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
43178#[rustc_legacy_const_generics(1)]
43179pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
43180    unsafe {
43181        static_assert_rounding!(ROUNDING);
43182        let a = a.as_f32x4();
43183        vcvtss2si(a, ROUNDING)
43184    }
43185}
43186
43187/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43188/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43189/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43190/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43191/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43192/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43193/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43194///
43195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
43196#[inline]
43197#[target_feature(enable = "avx512f")]
43198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43199#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
43200#[rustc_legacy_const_generics(1)]
43201pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
43202    unsafe {
43203        static_assert_rounding!(ROUNDING);
43204        let a = a.as_f32x4();
43205        vcvtss2si(a, ROUNDING)
43206    }
43207}
43208
43209/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
43210/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43211/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43212/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43213/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43214/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43215/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43216///
43217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
43218#[inline]
43219#[target_feature(enable = "avx512f")]
43220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43221#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
43222#[rustc_legacy_const_generics(1)]
43223pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
43224    unsafe {
43225        static_assert_rounding!(ROUNDING);
43226        let a = a.as_f32x4();
43227        vcvtss2usi(a, ROUNDING)
43228    }
43229}
43230
43231/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
43232///
43233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
43234#[inline]
43235#[target_feature(enable = "avx512f")]
43236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43237#[cfg_attr(test, assert_instr(vcvtss2si))]
43238pub fn _mm_cvtss_i32(a: __m128) -> i32 {
43239    unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43240}
43241
43242/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
43243///
43244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
43245#[inline]
43246#[target_feature(enable = "avx512f")]
43247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43248#[cfg_attr(test, assert_instr(vcvtss2usi))]
43249pub fn _mm_cvtss_u32(a: __m128) -> u32 {
43250    unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43251}
43252
43253/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43254/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43255/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43256/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43257/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43258/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43259/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43260///
43261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
43262#[inline]
43263#[target_feature(enable = "avx512f")]
43264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43265#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
43266#[rustc_legacy_const_generics(1)]
43267pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
43268    unsafe {
43269        static_assert_rounding!(ROUNDING);
43270        let a = a.as_f64x2();
43271        vcvtsd2si(a, ROUNDING)
43272    }
43273}
43274
43275/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
43276/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43277/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43278/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43279/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43280/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43281/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43282///
43283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
43284#[inline]
43285#[target_feature(enable = "avx512f")]
43286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43287#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
43288#[rustc_legacy_const_generics(1)]
43289pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
43290    unsafe {
43291        static_assert_rounding!(ROUNDING);
43292        let a = a.as_f64x2();
43293        vcvtsd2si(a, ROUNDING)
43294    }
43295}
43296
43297/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
43298/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43303/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43304///
43305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
43306#[inline]
43307#[target_feature(enable = "avx512f")]
43308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43309#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
43310#[rustc_legacy_const_generics(1)]
43311pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
43312    unsafe {
43313        static_assert_rounding!(ROUNDING);
43314        let a = a.as_f64x2();
43315        vcvtsd2usi(a, ROUNDING)
43316    }
43317}
43318
43319/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
43320///
43321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
43322#[inline]
43323#[target_feature(enable = "avx512f")]
43324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43325#[cfg_attr(test, assert_instr(vcvtsd2si))]
43326pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
43327    unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43328}
43329
43330/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
43331///
43332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
43333#[inline]
43334#[target_feature(enable = "avx512f")]
43335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43336#[cfg_attr(test, assert_instr(vcvtsd2usi))]
43337pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
43338    unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43339}
43340
43341/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43342///
43343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43349///
43350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
43351#[inline]
43352#[target_feature(enable = "avx512f")]
43353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43354#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
43355#[rustc_legacy_const_generics(2)]
43356pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
43357    unsafe {
43358        static_assert_rounding!(ROUNDING);
43359        let a = a.as_f32x4();
43360        let r = vcvtsi2ss(a, b, ROUNDING);
43361        transmute(r)
43362    }
43363}
43364
43365/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43366///
43367/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43368/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43369/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43370/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43371/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43372/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43373///
43374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
43375#[inline]
43376#[target_feature(enable = "avx512f")]
43377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43378#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
43379#[rustc_legacy_const_generics(2)]
43380pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
43381    unsafe {
43382        static_assert_rounding!(ROUNDING);
43383        let a = a.as_f32x4();
43384        let r = vcvtsi2ss(a, b, ROUNDING);
43385        transmute(r)
43386    }
43387}
43388
43389/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
43390/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
43391/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
43392/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
43393/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
43394/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
43395/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
43396///
43397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
43398#[inline]
43399#[target_feature(enable = "avx512f")]
43400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43401#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
43402#[rustc_legacy_const_generics(2)]
43403pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
43404    unsafe {
43405        static_assert_rounding!(ROUNDING);
43406        let a = a.as_f32x4();
43407        let r = vcvtusi2ss(a, b, ROUNDING);
43408        transmute(r)
43409    }
43410}
43411
43412/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
43413///
43414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
43415#[inline]
43416#[target_feature(enable = "avx512f")]
43417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43418#[cfg_attr(test, assert_instr(vcvtsi2ss))]
43419#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43420pub const fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
43421    unsafe {
43422        let b = b as f32;
43423        simd_insert!(a, 0, b)
43424    }
43425}
43426
43427/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
43428///
43429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
43430#[inline]
43431#[target_feature(enable = "avx512f")]
43432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43433#[cfg_attr(test, assert_instr(vcvtsi2sd))]
43434#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43435pub const fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
43436    unsafe {
43437        let b = b as f64;
43438        simd_insert!(a, 0, b)
43439    }
43440}
43441
43442/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43443/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43444///
43445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
43446#[inline]
43447#[target_feature(enable = "avx512f")]
43448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43449#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
43450#[rustc_legacy_const_generics(1)]
43451pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
43452    unsafe {
43453        static_assert_sae!(SAE);
43454        let a = a.as_f32x4();
43455        vcvttss2si(a, SAE)
43456    }
43457}
43458
43459/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43460/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43461///
43462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
43463#[inline]
43464#[target_feature(enable = "avx512f")]
43465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43466#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
43467#[rustc_legacy_const_generics(1)]
43468pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
43469    unsafe {
43470        static_assert_sae!(SAE);
43471        let a = a.as_f32x4();
43472        vcvttss2si(a, SAE)
43473    }
43474}
43475
43476/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
43477/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43478///
43479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
43480#[inline]
43481#[target_feature(enable = "avx512f")]
43482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43483#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
43484#[rustc_legacy_const_generics(1)]
43485pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
43486    unsafe {
43487        static_assert_sae!(SAE);
43488        let a = a.as_f32x4();
43489        vcvttss2usi(a, SAE)
43490    }
43491}
43492
43493/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
43494///
43495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
43496#[inline]
43497#[target_feature(enable = "avx512f")]
43498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43499#[cfg_attr(test, assert_instr(vcvttss2si))]
43500pub fn _mm_cvttss_i32(a: __m128) -> i32 {
43501    unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43502}
43503
43504/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
43505///
43506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
43507#[inline]
43508#[target_feature(enable = "avx512f")]
43509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43510#[cfg_attr(test, assert_instr(vcvttss2usi))]
43511pub fn _mm_cvttss_u32(a: __m128) -> u32 {
43512    unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
43513}
43514
43515/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43516/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43517///
43518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
43519#[inline]
43520#[target_feature(enable = "avx512f")]
43521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43522#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
43523#[rustc_legacy_const_generics(1)]
43524pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
43525    unsafe {
43526        static_assert_sae!(SAE);
43527        let a = a.as_f64x2();
43528        vcvttsd2si(a, SAE)
43529    }
43530}
43531
43532/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
43533/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43534///
43535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
43536#[inline]
43537#[target_feature(enable = "avx512f")]
43538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43539#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
43540#[rustc_legacy_const_generics(1)]
43541pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
43542    unsafe {
43543        static_assert_sae!(SAE);
43544        let a = a.as_f64x2();
43545        vcvttsd2si(a, SAE)
43546    }
43547}
43548
43549/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
43550/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43551///
43552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
43553#[inline]
43554#[target_feature(enable = "avx512f")]
43555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43556#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
43557#[rustc_legacy_const_generics(1)]
43558pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
43559    unsafe {
43560        static_assert_sae!(SAE);
43561        let a = a.as_f64x2();
43562        vcvttsd2usi(a, SAE)
43563    }
43564}
43565
43566/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
43567///
43568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
43569#[inline]
43570#[target_feature(enable = "avx512f")]
43571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43572#[cfg_attr(test, assert_instr(vcvttsd2si))]
43573pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
43574    unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43575}
43576
43577/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
43578///
43579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
43580#[inline]
43581#[target_feature(enable = "avx512f")]
43582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43583#[cfg_attr(test, assert_instr(vcvttsd2usi))]
43584pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
43585    unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
43586}
43587
43588/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
43589///
43590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
43591#[inline]
43592#[target_feature(enable = "avx512f")]
43593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43594#[cfg_attr(test, assert_instr(vcvtusi2ss))]
43595#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43596pub const fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
43597    unsafe {
43598        let b = b as f32;
43599        simd_insert!(a, 0, b)
43600    }
43601}
43602
43603/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
43604///
43605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
43606#[inline]
43607#[target_feature(enable = "avx512f")]
43608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43609#[cfg_attr(test, assert_instr(vcvtusi2sd))]
43610#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
43611pub const fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
43612    unsafe {
43613        let b = b as f64;
43614        simd_insert!(a, 0, b)
43615    }
43616}
43617
43618/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
43619/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43620///
43621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
43622#[inline]
43623#[target_feature(enable = "avx512f")]
43624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43625#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
43626#[rustc_legacy_const_generics(2, 3)]
43627pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
43628    unsafe {
43629        static_assert_uimm_bits!(IMM5, 5);
43630        static_assert_mantissas_sae!(SAE);
43631        let a = a.as_f32x4();
43632        let b = b.as_f32x4();
43633        vcomiss(a, b, IMM5, SAE)
43634    }
43635}
43636
43637/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
43638/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
43639///
43640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
43641#[inline]
43642#[target_feature(enable = "avx512f")]
43643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43644#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
43645#[rustc_legacy_const_generics(2, 3)]
43646pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
43647    unsafe {
43648        static_assert_uimm_bits!(IMM5, 5);
43649        static_assert_mantissas_sae!(SAE);
43650        let a = a.as_f64x2();
43651        let b = b.as_f64x2();
43652        vcomisd(a, b, IMM5, SAE)
43653    }
43654}
43655
43656/// Equal
43657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43658pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
43659/// Less-than
43660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43661pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
43662/// Less-than-or-equal
43663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43664pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
43665/// False
43666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43667pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
43668/// Not-equal
43669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43670pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
43671/// Not less-than
43672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43673pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
43674/// Not less-than-or-equal
43675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43676pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
43677/// True
43678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43679pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
43680
43681/// interval [1, 2)
43682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43683pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
43684/// interval [0.5, 2)
43685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43686pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
43687/// interval [0.5, 1)
43688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43689pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
43690/// interval [0.75, 1.5)
43691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43692pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
43693
43694/// sign = sign(SRC)
43695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43696pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
43697/// sign = 0
43698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43699pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
43700/// DEST = NaN if sign(SRC) = 1
43701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43702pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
43703
43704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43705pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
43706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43707pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
43708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43709pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
43710#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43711pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
43712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43713pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
43714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43715pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
43716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43717pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
43718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43719pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
43720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43721pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
43722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43723pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
43724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43725pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
43726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43727pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
43728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43729pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
43730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43731pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
43732#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43733pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
43734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43735pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
43736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43737pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
43738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43739pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
43740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43741pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
43742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43743pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
43744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43745pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
43746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43747pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
43748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43749pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
43750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43751pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
43752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43753pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
43754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43755pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
43756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43757pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
43758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43759pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
43760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43761pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
43762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43763pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
43764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43765pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
43766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43767pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
43768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43769pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
43770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43771pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
43772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43773pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
43774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43775pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
43776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43777pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
43778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43779pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
43780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43781pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
43782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43783pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
43784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43785pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
43786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43787pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
43788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43789pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
43790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43791pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
43792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43793pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
43794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43795pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
43796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43797pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
43798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43799pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
43800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43801pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
43802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43803pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
43804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43805pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
43806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43807pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
43808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43809pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
43810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43811pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
43812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43813pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
43814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43815pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
43816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43817pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
43818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43819pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
43820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43821pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
43822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43823pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
43824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43825pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
43826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43827pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
43828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43829pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
43830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43831pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
43832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43833pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
43834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43835pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
43836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43837pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
43838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43839pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
43840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43841pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
43842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43843pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
43844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43845pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
43846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43847pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
43848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43849pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
43850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43851pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
43852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43853pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
43854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43855pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
43856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43857pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
43858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43859pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
43860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43861pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
43862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43863pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
43864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43865pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
43866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43867pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
43868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43869pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
43870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43871pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
43872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43873pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
43874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43875pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
43876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43877pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
43878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43879pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
43880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43881pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
43882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43883pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
43884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43885pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
43886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43887pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
43888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43889pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
43890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43891pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
43892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43893pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
43894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43895pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
43896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43897pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
43898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43899pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
43900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43901pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
43902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43903pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
43904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43905pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
43906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43907pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
43908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43909pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
43910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43911pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
43912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43913pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
43914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43915pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
43916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43917pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
43918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43919pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
43920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43921pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
43922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43923pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
43924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43925pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
43926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43927pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
43928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43929pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
43930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43931pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
43932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43933pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
43934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43935pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
43936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43937pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
43938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43939pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
43940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43941pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
43942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43943pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
43944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43945pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
43946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43947pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
43948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43949pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
43950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43951pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
43952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43953pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
43954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43955pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
43956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43957pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
43958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43959pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
43960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43961pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
43962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43963pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
43964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43965pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
43966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43967pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
43968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43969pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
43970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43971pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
43972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43973pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
43974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43975pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
43976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43977pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
43978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43979pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
43980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43981pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
43982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43983pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
43984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43985pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
43986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43987pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
43988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43989pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
43990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43991pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
43992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43993pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
43994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43995pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
43996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43997pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
43998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
43999pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
44000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44001pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
44002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44003pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
44004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44005pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
44006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44007pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
44008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44009pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
44010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44011pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
44012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44013pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
44014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44015pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
44016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44017pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
44018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44019pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
44020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44021pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
44022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44023pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
44024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44025pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
44026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44027pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
44028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44029pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
44030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44031pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
44032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44033pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
44034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44035pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
44036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44037pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
44038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44039pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
44040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44041pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
44042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44043pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
44044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44045pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
44046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44047pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
44048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44049pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
44050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44051pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
44052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44053pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
44054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44055pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
44056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44057pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
44058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44059pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
44060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44061pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
44062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44063pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
44064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44065pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
44066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44067pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
44068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44069pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
44070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44071pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
44072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44073pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
44074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44075pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
44076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44077pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
44078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44079pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
44080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44081pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
44082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44083pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
44084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44085pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
44086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44087pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
44088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44089pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
44090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44091pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
44092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44093pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
44094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44095pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
44096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44097pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
44098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44099pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
44100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44101pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
44102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44103pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
44104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44105pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
44106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44107pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
44108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44109pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
44110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44111pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
44112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44113pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
44114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44115pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
44116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44117pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
44118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44119pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
44120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44121pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
44122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44123pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
44124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44125pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
44126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44127pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
44128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44129pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
44130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44131pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
44132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44133pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
44134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44135pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
44136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44137pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
44138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44139pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
44140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44141pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
44142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44143pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
44144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44145pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
44146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44147pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
44148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44149pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
44150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44151pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
44152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44153pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
44154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44155pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
44156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44157pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
44158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44159pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
44160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44161pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
44162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44163pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
44164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44165pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
44166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44167pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
44168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44169pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
44170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44171pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
44172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44173pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
44174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44175pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
44176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44177pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
44178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44179pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
44180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44181pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
44182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44183pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
44184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44185pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
44186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44187pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
44188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44189pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
44190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44191pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
44192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44193pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
44194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44195pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
44196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44197pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
44198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44199pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
44200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44201pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
44202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44203pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
44204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44205pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
44206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44207pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
44208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44209pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
44210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44211pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
44212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44213pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
44214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
44215pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
44216
44217#[allow(improper_ctypes)]
44218unsafe extern "C" {
44219    #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
44220    fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
44221    #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
44222    fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
44223
44224    #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
44225    fn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
44226    #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
44227    fn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
44228
44229    #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
44230    fn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
44231    #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
44232    fn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
44233
44234    #[link_name = "llvm.x86.avx512.add.ps.512"]
44235    fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44236    #[link_name = "llvm.x86.avx512.add.pd.512"]
44237    fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44238    #[link_name = "llvm.x86.avx512.sub.ps.512"]
44239    fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44240    #[link_name = "llvm.x86.avx512.sub.pd.512"]
44241    fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44242    #[link_name = "llvm.x86.avx512.mul.ps.512"]
44243    fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44244    #[link_name = "llvm.x86.avx512.mul.pd.512"]
44245    fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44246    #[link_name = "llvm.x86.avx512.div.ps.512"]
44247    fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
44248    #[link_name = "llvm.x86.avx512.div.pd.512"]
44249    fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
44250
44251    #[link_name = "llvm.x86.avx512.max.ps.512"]
44252    fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
44253    #[link_name = "llvm.x86.avx512.max.pd.512"]
44254    fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
44255    #[link_name = "llvm.x86.avx512.min.ps.512"]
44256    fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
44257    #[link_name = "llvm.x86.avx512.min.pd.512"]
44258    fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
44259
44260    #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
44261    fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
44262
44263    #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
44264    fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44265    #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
44266    fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44267
44268    #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
44269    fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
44270    #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
44271    fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44272    #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
44273    fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44274
44275    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
44276    fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
44277    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
44278    fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
44279    #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
44280    fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
44281
44282    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
44283    fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
44284    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
44285    fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
44286    #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
44287    fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
44288
44289    #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
44290    fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
44291    #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
44292    fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
44293    #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
44294    fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44295
44296    #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
44297    fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
44298    #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
44299    fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
44300    #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
44301    fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44302
44303    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
44304    fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
44305    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
44306    fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
44307    #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
44308    fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
44309
44310    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
44311    fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
44312    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
44313    fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
44314    #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
44315    fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
44316
44317    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
44318    fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
44319    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
44320    fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
44321    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
44322    fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
44323
44324    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
44325    fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
44326    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
44327    fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
44328    #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
44329    fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
44330
44331    #[link_name = "llvm.x86.avx512.pternlog.d.512"]
44332    fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
44333    #[link_name = "llvm.x86.avx512.pternlog.d.256"]
44334    fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
44335    #[link_name = "llvm.x86.avx512.pternlog.d.128"]
44336    fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
44337
44338    #[link_name = "llvm.x86.avx512.pternlog.q.512"]
44339    fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
44340    #[link_name = "llvm.x86.avx512.pternlog.q.256"]
44341    fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
44342    #[link_name = "llvm.x86.avx512.pternlog.q.128"]
44343    fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
44344
44345    #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
44346    fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
44347    #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
44348    fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
44349    #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
44350    fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
44351
44352    #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
44353    fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
44354    #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
44355    fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
44356    #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
44357    fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
44358
44359    #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
44360    fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
44361    #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
44362    fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44363    #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
44364    fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44365
44366    #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
44367    fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
44368    #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
44369    fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44370    #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
44371    fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44372
44373    #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
44374    fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
44375    #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
44376    fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
44377    #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
44378    fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
44379
44380    #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
44381    fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
44382    #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
44383    fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
44384    #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
44385    fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
44386
44387    #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
44388    fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
44389
44390    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
44391    fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
44392    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
44393    fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
44394    #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
44395    fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
44396
44397    #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
44398    fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
44399    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps"]
44400    fn vcvtpd2ps128(a: f64x2, src: f32x4, mask: u8) -> f32x4;
44401    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
44402    fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
44403
44404    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.128"]
44405    fn vcvtpd2dq128(a: f64x2, src: i32x4, k: u8) -> i32x4;
44406    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
44407    fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
44408
44409    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
44410    fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
44411    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
44412    fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
44413    #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
44414    fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
44415
44416    #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
44417    fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
44418    #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
44419    fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
44420
44421    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
44422    fn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
44423    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
44424    fn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
44425    #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
44426    fn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
44427
44428    #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
44429    fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
44430
44431    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
44432    fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
44433    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
44434    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
44435    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
44436    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
44437
44438    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
44439    fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
44440    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
44441    fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
44442    #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
44443    fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
44444
44445    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
44446    fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
44447    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
44448    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
44449    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
44450    fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
44451
44452    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
44453    fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
44454    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
44455    fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
44456    #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
44457    fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
44458
44459    #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
44460    fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
44461    #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
44462    fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
44463    #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
44464    fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
44465
44466    #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
44467    fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
44468    #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
44469    fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
44470    #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
44471    fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
44472    #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
44473    fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
44474    #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
44475    fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
44476
44477    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
44478    fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44479    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
44480    fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44481    #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
44482    fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44483
44484    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
44485    fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44486    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
44487    fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44488    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
44489    fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44490
44491    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
44492    fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44493    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
44494    fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44495    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
44496    fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44497
44498    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
44499    fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44500    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
44501    fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44502    #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
44503    fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44504
44505    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
44506    fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44507    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
44508    fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44509    #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
44510    fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44511
44512    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
44513    fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
44514    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
44515    fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
44516    #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
44517    fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
44518
44519    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
44520    fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44521    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
44522    fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44523    #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
44524    fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44525
44526    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
44527    fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44528    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
44529    fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44530    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
44531    fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44532
44533    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
44534    fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44535    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
44536    fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44537    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
44538    fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44539
44540    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
44541    fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44542    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
44543    fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44544    #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
44545    fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44546
44547    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
44548    fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44549    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
44550    fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44551    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
44552    fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44553
44554    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
44555    fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44556    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
44557    fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44558    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
44559    fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44560
44561    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
44562    fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44563    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
44564    fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44565    #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
44566    fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44567
44568    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
44569    fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44570    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
44571    fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44572    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
44573    fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44574
44575    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
44576    fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
44577    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
44578    fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
44579    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
44580    fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
44581
44582    #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
44583    fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
44584
44585    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
44586    fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
44587    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
44588    fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
44589    #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
44590    fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
44591
44592    #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
44593    fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
44594    #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
44595    fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
44596    #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
44597    fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
44598
44599    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
44600    fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
44601    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
44602    fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
44603    #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
44604    fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
44605
44606    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
44607    fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
44608    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
44609    fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
44610    #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
44611    fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
44612
44613    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
44614    fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
44615    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
44616    fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
44617    #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
44618    fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
44619
44620    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
44621    fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
44622    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
44623    fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
44624    #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
44625    fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
44626
44627    #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
44628    fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
44629    #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
44630    fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
44631    #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
44632    fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
44633
44634    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
44635    fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
44636    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
44637    fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
44638    #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
44639    fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
44640
44641    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
44642    fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
44643    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
44644    fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
44645    #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
44646    fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
44647
44648    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
44649    fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
44650    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
44651    fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
44652    #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
44653    fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
44654
44655    #[link_name = "llvm.x86.avx512.gather.dpd.512"]
44656    fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
44657    #[link_name = "llvm.x86.avx512.gather.dps.512"]
44658    fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
44659    #[link_name = "llvm.x86.avx512.gather.qpd.512"]
44660    fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
44661    #[link_name = "llvm.x86.avx512.gather.qps.512"]
44662    fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
44663    #[link_name = "llvm.x86.avx512.gather.dpq.512"]
44664    fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
44665    #[link_name = "llvm.x86.avx512.gather.dpi.512"]
44666    fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
44667    #[link_name = "llvm.x86.avx512.gather.qpq.512"]
44668    fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
44669    #[link_name = "llvm.x86.avx512.gather.qpi.512"]
44670    fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
44671
44672    #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
44673    fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
44674    #[link_name = "llvm.x86.avx512.scatter.dps.512"]
44675    fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
44676    #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
44677    fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
44678    #[link_name = "llvm.x86.avx512.scatter.qps.512"]
44679    fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
44680    #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
44681    fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
44682
44683    #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
44684    fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
44685    #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
44686    fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
44687    #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
44688    fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
44689
44690    #[link_name = "llvm.x86.avx512.scattersiv4.si"]
44691    fn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
44692    #[link_name = "llvm.x86.avx512.scattersiv2.di"]
44693    fn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
44694    #[link_name = "llvm.x86.avx512.scattersiv2.df"]
44695    fn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
44696    #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
44697    fn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
44698    #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
44699    fn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
44700    #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
44701    fn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
44702    #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
44703    fn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
44704    #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
44705    fn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
44706
44707    #[link_name = "llvm.x86.avx512.scattersiv8.si"]
44708    fn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
44709    #[link_name = "llvm.x86.avx512.scattersiv4.di"]
44710    fn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
44711    #[link_name = "llvm.x86.avx512.scattersiv4.df"]
44712    fn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
44713    #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
44714    fn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
44715    #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
44716    fn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
44717    #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
44718    fn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
44719    #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
44720    fn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
44721    #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
44722    fn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
44723
44724    #[link_name = "llvm.x86.avx512.gather3siv4.si"]
44725    fn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
44726    #[link_name = "llvm.x86.avx512.gather3siv2.di"]
44727    fn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
44728    #[link_name = "llvm.x86.avx512.gather3siv2.df"]
44729    fn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
44730    #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
44731    fn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
44732    #[link_name = "llvm.x86.avx512.gather3div4.si"]
44733    fn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
44734    #[link_name = "llvm.x86.avx512.gather3div2.di"]
44735    fn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
44736    #[link_name = "llvm.x86.avx512.gather3div2.df"]
44737    fn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
44738    #[link_name = "llvm.x86.avx512.gather3div4.sf"]
44739    fn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
44740
44741    #[link_name = "llvm.x86.avx512.gather3siv8.si"]
44742    fn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
44743    #[link_name = "llvm.x86.avx512.gather3siv4.di"]
44744    fn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
44745    #[link_name = "llvm.x86.avx512.gather3siv4.df"]
44746    fn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
44747    #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
44748    fn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
44749    #[link_name = "llvm.x86.avx512.gather3div8.si"]
44750    fn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
44751    #[link_name = "llvm.x86.avx512.gather3div4.di"]
44752    fn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
44753    #[link_name = "llvm.x86.avx512.gather3div4.df"]
44754    fn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
44755    #[link_name = "llvm.x86.avx512.gather3div8.sf"]
44756    fn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
44757
44758    #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
44759    fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
44760    #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
44761    fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
44762
44763    #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
44764    fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
44765    #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
44766    fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
44767    #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
44768    fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
44769
44770    #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
44771    fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
44772    #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
44773    fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
44774    #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
44775    fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
44776
44777    #[link_name = "llvm.x86.avx512.psll.d.512"]
44778    fn vpslld(a: i32x16, count: i32x4) -> i32x16;
44779    #[link_name = "llvm.x86.avx512.psrl.d.512"]
44780    fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
44781    #[link_name = "llvm.x86.avx512.psll.q.512"]
44782    fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
44783    #[link_name = "llvm.x86.avx512.psrl.q.512"]
44784    fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
44785
44786    #[link_name = "llvm.x86.avx512.psra.d.512"]
44787    fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
44788
44789    #[link_name = "llvm.x86.avx512.psra.q.512"]
44790    fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
44791    #[link_name = "llvm.x86.avx512.psra.q.256"]
44792    fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
44793    #[link_name = "llvm.x86.avx512.psra.q.128"]
44794    fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
44795
44796    #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
44797    fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
44798    #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
44799    fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
44800
44801    #[link_name = "llvm.x86.avx512.permvar.si.512"]
44802    fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
44803
44804    #[link_name = "llvm.x86.avx512.permvar.di.512"]
44805    fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
44806    #[link_name = "llvm.x86.avx512.permvar.di.256"]
44807    fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
44808
44809    #[link_name = "llvm.x86.avx512.permvar.sf.512"]
44810    fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
44811
44812    #[link_name = "llvm.x86.avx512.permvar.df.512"]
44813    fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
44814    #[link_name = "llvm.x86.avx512.permvar.df.256"]
44815    fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
44816
44817    #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
44818    fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
44819    #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
44820    fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
44821    #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
44822    fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
44823
44824    #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
44825    fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
44826    #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
44827    fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
44828    #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
44829    fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
44830
44831    #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
44832    fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
44833    #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
44834    fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
44835    #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
44836    fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
44837
44838    #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
44839    fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
44840    #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
44841    fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
44842    #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
44843    fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
44844
44845    #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
44846    fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
44847    #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
44848    fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
44849    #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
44850    fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
44851
44852    #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
44853    fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
44854    #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
44855    fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
44856    #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
44857    fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
44858
44859    #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
44860    fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
44861    #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
44862    fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
44863    #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
44864    fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
44865
44866    #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
44867    fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
44868    #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
44869    fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
44870    #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
44871    fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
44872
44873    #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
44874    fn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
44875    #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
44876    fn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
44877    #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
44878    fn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
44879
44880    #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
44881    fn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
44882    #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
44883    fn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
44884    #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
44885    fn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
44886
44887    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
44888    fn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
44889    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
44890    fn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
44891    #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
44892    fn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
44893
44894    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
44895    fn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
44896    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
44897    fn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
44898    #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
44899    fn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
44900
44901    #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
44902    fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
44903    #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
44904    fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
44905    #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
44906    fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
44907
44908    #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
44909    fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
44910    #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
44911    fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
44912    #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
44913    fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
44914
44915    #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
44916    fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
44917    #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
44918    fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
44919    #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
44920    fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
44921
44922    #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
44923    fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
44924    #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
44925    fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
44926    #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
44927    fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
44928
44929    #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
44930    fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44931    #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
44932    fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44933    #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
44934    fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44935    #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
44936    fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44937    #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
44938    fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44939    #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
44940    fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44941    #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
44942    fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44943    #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
44944    fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44945    #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
44946    fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44947    #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
44948    fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44949    #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
44950    fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44951    #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
44952    fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44953    #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
44954    fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
44955    #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
44956    fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
44957    #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
44958    fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
44959    #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
44960    fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
44961    #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
44962    fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
44963    #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
44964    fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
44965
44966    #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
44967    fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44968    #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
44969    fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44970    #[link_name = "llvm.x86.avx512.rcp14.ss"]
44971    fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
44972    #[link_name = "llvm.x86.avx512.rcp14.sd"]
44973    fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
44974
44975    #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
44976    fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
44977    #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
44978    fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
44979    #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
44980    fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
44981    #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
44982    fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
44983
44984    #[link_name = "llvm.x86.avx512.vfmadd.f32"]
44985    fn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
44986    #[link_name = "llvm.x86.avx512.vfmadd.f64"]
44987    fn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
44988
44989    #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
44990    fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
44991    #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
44992    fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
44993    #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
44994    fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
44995    #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
44996    fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
44997
44998    #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
44999    fn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
45000    #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
45001    fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
45002
45003    #[link_name = "llvm.x86.avx512.vcvtss2si32"]
45004    fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
45005    #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
45006    fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
45007
45008    #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
45009    fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
45010    #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
45011    fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
45012
45013    #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
45014    fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
45015
45016    #[link_name = "llvm.x86.avx512.cvtusi2ss"]
45017    fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
45018
45019    #[link_name = "llvm.x86.avx512.cvttss2si"]
45020    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
45021    #[link_name = "llvm.x86.avx512.cvttss2usi"]
45022    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
45023
45024    #[link_name = "llvm.x86.avx512.cvttsd2si"]
45025    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
45026    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
45027    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
45028
45029    #[link_name = "llvm.x86.avx512.vcomi.ss"]
45030    fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
45031    #[link_name = "llvm.x86.avx512.vcomi.sd"]
45032    fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
45033
45034    #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
45035    fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
45036    #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
45037    fn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
45038    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
45039    fn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
45040    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
45041    fn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
45042    #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
45043    fn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
45044    #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
45045    fn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
45046    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
45047    fn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
45048    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
45049    fn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
45050    #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
45051    fn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
45052    #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
45053    fn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
45054    #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
45055    fn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
45056    #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
45057    fn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
45058
45059}
45060
45061#[cfg(test)]
45062mod tests {
45063    use crate::core_arch::assert_eq_const as assert_eq;
45064
45065    use stdarch_test::simd_test;
45066
45067    use crate::core_arch::x86::*;
45068    use crate::hint::black_box;
45069    use crate::mem::{self};
45070
45071    #[simd_test(enable = "avx512f")]
45072    const fn test_mm512_abs_epi32() {
45073        #[rustfmt::skip]
45074        let a = _mm512_setr_epi32(
45075            0, 1, -1, i32::MAX,
45076            i32::MIN, 100, -100, -32,
45077            0, 1, -1, i32::MAX,
45078            i32::MIN, 100, -100, -32,
45079        );
45080        let r = _mm512_abs_epi32(a);
45081        #[rustfmt::skip]
45082        let e = _mm512_setr_epi32(
45083            0, 1, 1, i32::MAX,
45084            i32::MAX.wrapping_add(1), 100, 100, 32,
45085            0, 1, 1, i32::MAX,
45086            i32::MAX.wrapping_add(1), 100, 100, 32,
45087        );
45088        assert_eq_m512i(r, e);
45089    }
45090
45091    #[simd_test(enable = "avx512f")]
45092    const fn test_mm512_mask_abs_epi32() {
45093        #[rustfmt::skip]
45094        let a = _mm512_setr_epi32(
45095            0, 1, -1, i32::MAX,
45096            i32::MIN, 100, -100, -32,
45097            0, 1, -1, i32::MAX,
45098            i32::MIN, 100, -100, -32,
45099        );
45100        let r = _mm512_mask_abs_epi32(a, 0, a);
45101        assert_eq_m512i(r, a);
45102        let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
45103        #[rustfmt::skip]
45104        let e = _mm512_setr_epi32(
45105            0, 1, 1, i32::MAX,
45106            i32::MAX.wrapping_add(1), 100, 100, 32,
45107            0, 1, -1, i32::MAX,
45108            i32::MIN, 100, -100, -32,
45109        );
45110        assert_eq_m512i(r, e);
45111    }
45112
45113    #[simd_test(enable = "avx512f")]
45114    const fn test_mm512_maskz_abs_epi32() {
45115        #[rustfmt::skip]
45116        let a = _mm512_setr_epi32(
45117            0, 1, -1, i32::MAX,
45118            i32::MIN, 100, -100, -32,
45119            0, 1, -1, i32::MAX,
45120            i32::MIN, 100, -100, -32,
45121        );
45122        let r = _mm512_maskz_abs_epi32(0, a);
45123        assert_eq_m512i(r, _mm512_setzero_si512());
45124        let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
45125        #[rustfmt::skip]
45126        let e = _mm512_setr_epi32(
45127            0, 1, 1, i32::MAX,
45128            i32::MAX.wrapping_add(1), 100, 100, 32,
45129            0, 0, 0, 0,
45130            0, 0, 0, 0,
45131        );
45132        assert_eq_m512i(r, e);
45133    }
45134
45135    #[simd_test(enable = "avx512f,avx512vl")]
45136    const fn test_mm256_mask_abs_epi32() {
45137        #[rustfmt::skip]
45138        let a = _mm256_setr_epi32(
45139            0, 1, -1, i32::MAX,
45140            i32::MIN, 100, -100, -32,
45141        );
45142        let r = _mm256_mask_abs_epi32(a, 0, a);
45143        assert_eq_m256i(r, a);
45144        let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
45145        #[rustfmt::skip]
45146        let e = _mm256_setr_epi32(
45147            0, 1, 1, i32::MAX,
45148            i32::MAX.wrapping_add(1), 100, -100, -32,
45149        );
45150        assert_eq_m256i(r, e);
45151    }
45152
45153    #[simd_test(enable = "avx512f,avx512vl")]
45154    const fn test_mm256_maskz_abs_epi32() {
45155        #[rustfmt::skip]
45156        let a = _mm256_setr_epi32(
45157            0, 1, -1, i32::MAX,
45158            i32::MIN, 100, -100, -32,
45159        );
45160        let r = _mm256_maskz_abs_epi32(0, a);
45161        assert_eq_m256i(r, _mm256_setzero_si256());
45162        let r = _mm256_maskz_abs_epi32(0b00001111, a);
45163        #[rustfmt::skip]
45164        let e = _mm256_setr_epi32(
45165            0, 1, 1, i32::MAX,
45166            0, 0, 0, 0,
45167        );
45168        assert_eq_m256i(r, e);
45169    }
45170
45171    #[simd_test(enable = "avx512f,avx512vl")]
45172    const fn test_mm_mask_abs_epi32() {
45173        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
45174        let r = _mm_mask_abs_epi32(a, 0, a);
45175        assert_eq_m128i(r, a);
45176        let r = _mm_mask_abs_epi32(a, 0b00001111, a);
45177        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
45178        assert_eq_m128i(r, e);
45179    }
45180
45181    #[simd_test(enable = "avx512f,avx512vl")]
45182    const fn test_mm_maskz_abs_epi32() {
45183        let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
45184        let r = _mm_maskz_abs_epi32(0, a);
45185        assert_eq_m128i(r, _mm_setzero_si128());
45186        let r = _mm_maskz_abs_epi32(0b00001111, a);
45187        let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
45188        assert_eq_m128i(r, e);
45189    }
45190
45191    #[simd_test(enable = "avx512f")]
45192    const fn test_mm512_abs_ps() {
45193        #[rustfmt::skip]
45194        let a = _mm512_setr_ps(
45195            0., 1., -1., f32::MAX,
45196            f32::MIN, 100., -100., -32.,
45197            0., 1., -1., f32::MAX,
45198            f32::MIN, 100., -100., -32.,
45199        );
45200        let r = _mm512_abs_ps(a);
45201        #[rustfmt::skip]
45202        let e = _mm512_setr_ps(
45203            0., 1., 1., f32::MAX,
45204            f32::MAX, 100., 100., 32.,
45205            0., 1., 1., f32::MAX,
45206            f32::MAX, 100., 100., 32.,
45207        );
45208        assert_eq_m512(r, e);
45209    }
45210
45211    #[simd_test(enable = "avx512f")]
45212    const fn test_mm512_mask_abs_ps() {
45213        #[rustfmt::skip]
45214        let a = _mm512_setr_ps(
45215            0., 1., -1., f32::MAX,
45216            f32::MIN, 100., -100., -32.,
45217            0., 1., -1., f32::MAX,
45218            f32::MIN, 100., -100., -32.,
45219        );
45220        let r = _mm512_mask_abs_ps(a, 0, a);
45221        assert_eq_m512(r, a);
45222        let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
45223        #[rustfmt::skip]
45224        let e = _mm512_setr_ps(
45225            0., 1., 1., f32::MAX,
45226            f32::MAX, 100., 100., 32.,
45227            0., 1., -1., f32::MAX,
45228            f32::MIN, 100., -100., -32.,
45229        );
45230        assert_eq_m512(r, e);
45231    }
45232
45233    #[simd_test(enable = "avx512f")]
45234    const fn test_mm512_mask_mov_epi32() {
45235        let src = _mm512_set1_epi32(1);
45236        let a = _mm512_set1_epi32(2);
45237        let r = _mm512_mask_mov_epi32(src, 0, a);
45238        assert_eq_m512i(r, src);
45239        let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
45240        assert_eq_m512i(r, a);
45241    }
45242
45243    #[simd_test(enable = "avx512f")]
45244    const fn test_mm512_maskz_mov_epi32() {
45245        let a = _mm512_set1_epi32(2);
45246        let r = _mm512_maskz_mov_epi32(0, a);
45247        assert_eq_m512i(r, _mm512_setzero_si512());
45248        let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
45249        assert_eq_m512i(r, a);
45250    }
45251
45252    #[simd_test(enable = "avx512f,avx512vl")]
45253    const fn test_mm256_mask_mov_epi32() {
45254        let src = _mm256_set1_epi32(1);
45255        let a = _mm256_set1_epi32(2);
45256        let r = _mm256_mask_mov_epi32(src, 0, a);
45257        assert_eq_m256i(r, src);
45258        let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
45259        assert_eq_m256i(r, a);
45260    }
45261
45262    #[simd_test(enable = "avx512f,avx512vl")]
45263    const fn test_mm256_maskz_mov_epi32() {
45264        let a = _mm256_set1_epi32(2);
45265        let r = _mm256_maskz_mov_epi32(0, a);
45266        assert_eq_m256i(r, _mm256_setzero_si256());
45267        let r = _mm256_maskz_mov_epi32(0b11111111, a);
45268        assert_eq_m256i(r, a);
45269    }
45270
45271    #[simd_test(enable = "avx512f,avx512vl")]
45272    const fn test_mm_mask_mov_epi32() {
45273        let src = _mm_set1_epi32(1);
45274        let a = _mm_set1_epi32(2);
45275        let r = _mm_mask_mov_epi32(src, 0, a);
45276        assert_eq_m128i(r, src);
45277        let r = _mm_mask_mov_epi32(src, 0b00001111, a);
45278        assert_eq_m128i(r, a);
45279    }
45280
45281    #[simd_test(enable = "avx512f,avx512vl")]
45282    const fn test_mm_maskz_mov_epi32() {
45283        let a = _mm_set1_epi32(2);
45284        let r = _mm_maskz_mov_epi32(0, a);
45285        assert_eq_m128i(r, _mm_setzero_si128());
45286        let r = _mm_maskz_mov_epi32(0b00001111, a);
45287        assert_eq_m128i(r, a);
45288    }
45289
45290    #[simd_test(enable = "avx512f")]
45291    const fn test_mm512_mask_mov_ps() {
45292        let src = _mm512_set1_ps(1.);
45293        let a = _mm512_set1_ps(2.);
45294        let r = _mm512_mask_mov_ps(src, 0, a);
45295        assert_eq_m512(r, src);
45296        let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
45297        assert_eq_m512(r, a);
45298    }
45299
45300    #[simd_test(enable = "avx512f")]
45301    const fn test_mm512_maskz_mov_ps() {
45302        let a = _mm512_set1_ps(2.);
45303        let r = _mm512_maskz_mov_ps(0, a);
45304        assert_eq_m512(r, _mm512_setzero_ps());
45305        let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
45306        assert_eq_m512(r, a);
45307    }
45308
45309    #[simd_test(enable = "avx512f,avx512vl")]
45310    const fn test_mm256_mask_mov_ps() {
45311        let src = _mm256_set1_ps(1.);
45312        let a = _mm256_set1_ps(2.);
45313        let r = _mm256_mask_mov_ps(src, 0, a);
45314        assert_eq_m256(r, src);
45315        let r = _mm256_mask_mov_ps(src, 0b11111111, a);
45316        assert_eq_m256(r, a);
45317    }
45318
45319    #[simd_test(enable = "avx512f,avx512vl")]
45320    const fn test_mm256_maskz_mov_ps() {
45321        let a = _mm256_set1_ps(2.);
45322        let r = _mm256_maskz_mov_ps(0, a);
45323        assert_eq_m256(r, _mm256_setzero_ps());
45324        let r = _mm256_maskz_mov_ps(0b11111111, a);
45325        assert_eq_m256(r, a);
45326    }
45327
45328    #[simd_test(enable = "avx512f,avx512vl")]
45329    const fn test_mm_mask_mov_ps() {
45330        let src = _mm_set1_ps(1.);
45331        let a = _mm_set1_ps(2.);
45332        let r = _mm_mask_mov_ps(src, 0, a);
45333        assert_eq_m128(r, src);
45334        let r = _mm_mask_mov_ps(src, 0b00001111, a);
45335        assert_eq_m128(r, a);
45336    }
45337
45338    #[simd_test(enable = "avx512f,avx512vl")]
45339    const fn test_mm_maskz_mov_ps() {
45340        let a = _mm_set1_ps(2.);
45341        let r = _mm_maskz_mov_ps(0, a);
45342        assert_eq_m128(r, _mm_setzero_ps());
45343        let r = _mm_maskz_mov_ps(0b00001111, a);
45344        assert_eq_m128(r, a);
45345    }
45346
45347    #[simd_test(enable = "avx512f")]
45348    const fn test_mm512_add_epi32() {
45349        #[rustfmt::skip]
45350        let a = _mm512_setr_epi32(
45351            0, 1, -1, i32::MAX,
45352            i32::MIN, 100, -100, -32,
45353            0, 1, -1, i32::MAX,
45354            i32::MIN, 100, -100, -32,
45355        );
45356        let b = _mm512_set1_epi32(1);
45357        let r = _mm512_add_epi32(a, b);
45358        #[rustfmt::skip]
45359        let e = _mm512_setr_epi32(
45360            1, 2, 0, i32::MIN,
45361            i32::MIN + 1, 101, -99, -31,
45362            1, 2, 0, i32::MIN,
45363            i32::MIN + 1, 101, -99, -31,
45364        );
45365        assert_eq_m512i(r, e);
45366    }
45367
45368    #[simd_test(enable = "avx512f")]
45369    const fn test_mm512_mask_add_epi32() {
45370        #[rustfmt::skip]
45371        let a = _mm512_setr_epi32(
45372            0, 1, -1, i32::MAX,
45373            i32::MIN, 100, -100, -32,
45374            0, 1, -1, i32::MAX,
45375            i32::MIN, 100, -100, -32,
45376        );
45377        let b = _mm512_set1_epi32(1);
45378        let r = _mm512_mask_add_epi32(a, 0, a, b);
45379        assert_eq_m512i(r, a);
45380        let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
45381        #[rustfmt::skip]
45382        let e = _mm512_setr_epi32(
45383            1, 2, 0, i32::MIN,
45384            i32::MIN + 1, 101, -99, -31,
45385            0, 1, -1, i32::MAX,
45386            i32::MIN, 100, -100, -32,
45387        );
45388        assert_eq_m512i(r, e);
45389    }
45390
45391    #[simd_test(enable = "avx512f")]
45392    const fn test_mm512_maskz_add_epi32() {
45393        #[rustfmt::skip]
45394        let a = _mm512_setr_epi32(
45395            0, 1, -1, i32::MAX,
45396            i32::MIN, 100, -100, -32,
45397            0, 1, -1, i32::MAX,
45398            i32::MIN, 100, -100, -32,
45399        );
45400        let b = _mm512_set1_epi32(1);
45401        let r = _mm512_maskz_add_epi32(0, a, b);
45402        assert_eq_m512i(r, _mm512_setzero_si512());
45403        let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
45404        #[rustfmt::skip]
45405        let e = _mm512_setr_epi32(
45406            1, 2, 0, i32::MIN,
45407            i32::MIN + 1, 101, -99, -31,
45408            0, 0, 0, 0,
45409            0, 0, 0, 0,
45410        );
45411        assert_eq_m512i(r, e);
45412    }
45413
45414    #[simd_test(enable = "avx512f,avx512vl")]
45415    const fn test_mm256_mask_add_epi32() {
45416        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45417        let b = _mm256_set1_epi32(1);
45418        let r = _mm256_mask_add_epi32(a, 0, a, b);
45419        assert_eq_m256i(r, a);
45420        let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
45421        let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
45422        assert_eq_m256i(r, e);
45423    }
45424
45425    #[simd_test(enable = "avx512f,avx512vl")]
45426    const fn test_mm256_maskz_add_epi32() {
45427        let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45428        let b = _mm256_set1_epi32(1);
45429        let r = _mm256_maskz_add_epi32(0, a, b);
45430        assert_eq_m256i(r, _mm256_setzero_si256());
45431        let r = _mm256_maskz_add_epi32(0b11111111, a, b);
45432        let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
45433        assert_eq_m256i(r, e);
45434    }
45435
45436    #[simd_test(enable = "avx512f,avx512vl")]
45437    const fn test_mm_mask_add_epi32() {
45438        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45439        let b = _mm_set1_epi32(1);
45440        let r = _mm_mask_add_epi32(a, 0, a, b);
45441        assert_eq_m128i(r, a);
45442        let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
45443        let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
45444        assert_eq_m128i(r, e);
45445    }
45446
45447    #[simd_test(enable = "avx512f,avx512vl")]
45448    const fn test_mm_maskz_add_epi32() {
45449        let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
45450        let b = _mm_set1_epi32(1);
45451        let r = _mm_maskz_add_epi32(0, a, b);
45452        assert_eq_m128i(r, _mm_setzero_si128());
45453        let r = _mm_maskz_add_epi32(0b00001111, a, b);
45454        let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
45455        assert_eq_m128i(r, e);
45456    }
45457
45458    #[simd_test(enable = "avx512f")]
45459    const fn test_mm512_add_ps() {
45460        #[rustfmt::skip]
45461        let a = _mm512_setr_ps(
45462            0., 1., -1., f32::MAX,
45463            f32::MIN, 100., -100., -32.,
45464            0., 1., -1., f32::MAX,
45465            f32::MIN, 100., -100., -32.,
45466        );
45467        let b = _mm512_set1_ps(1.);
45468        let r = _mm512_add_ps(a, b);
45469        #[rustfmt::skip]
45470        let e = _mm512_setr_ps(
45471            1., 2., 0., f32::MAX,
45472            f32::MIN + 1., 101., -99., -31.,
45473            1., 2., 0., f32::MAX,
45474            f32::MIN + 1., 101., -99., -31.,
45475        );
45476        assert_eq_m512(r, e);
45477    }
45478
45479    #[simd_test(enable = "avx512f")]
45480    const fn test_mm512_mask_add_ps() {
45481        #[rustfmt::skip]
45482        let a = _mm512_setr_ps(
45483            0., 1., -1., f32::MAX,
45484            f32::MIN, 100., -100., -32.,
45485            0., 1., -1., f32::MAX,
45486            f32::MIN, 100., -100., -32.,
45487        );
45488        let b = _mm512_set1_ps(1.);
45489        let r = _mm512_mask_add_ps(a, 0, a, b);
45490        assert_eq_m512(r, a);
45491        let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
45492        #[rustfmt::skip]
45493        let e = _mm512_setr_ps(
45494            1., 2., 0., f32::MAX,
45495            f32::MIN + 1., 101., -99., -31.,
45496            0., 1., -1., f32::MAX,
45497            f32::MIN, 100., -100., -32.,
45498        );
45499        assert_eq_m512(r, e);
45500    }
45501
45502    #[simd_test(enable = "avx512f")]
45503    const fn test_mm512_maskz_add_ps() {
45504        #[rustfmt::skip]
45505        let a = _mm512_setr_ps(
45506            0., 1., -1., f32::MAX,
45507            f32::MIN, 100., -100., -32.,
45508            0., 1., -1., f32::MAX,
45509            f32::MIN, 100., -100., -32.,
45510        );
45511        let b = _mm512_set1_ps(1.);
45512        let r = _mm512_maskz_add_ps(0, a, b);
45513        assert_eq_m512(r, _mm512_setzero_ps());
45514        let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
45515        #[rustfmt::skip]
45516        let e = _mm512_setr_ps(
45517            1., 2., 0., f32::MAX,
45518            f32::MIN + 1., 101., -99., -31.,
45519            0., 0., 0., 0.,
45520            0., 0., 0., 0.,
45521        );
45522        assert_eq_m512(r, e);
45523    }
45524
45525    #[simd_test(enable = "avx512f,avx512vl")]
45526    const fn test_mm256_mask_add_ps() {
45527        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45528        let b = _mm256_set1_ps(1.);
45529        let r = _mm256_mask_add_ps(a, 0, a, b);
45530        assert_eq_m256(r, a);
45531        let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
45532        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
45533        assert_eq_m256(r, e);
45534    }
45535
45536    #[simd_test(enable = "avx512f,avx512vl")]
45537    const fn test_mm256_maskz_add_ps() {
45538        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45539        let b = _mm256_set1_ps(1.);
45540        let r = _mm256_maskz_add_ps(0, a, b);
45541        assert_eq_m256(r, _mm256_setzero_ps());
45542        let r = _mm256_maskz_add_ps(0b11111111, a, b);
45543        let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
45544        assert_eq_m256(r, e);
45545    }
45546
45547    #[simd_test(enable = "avx512f,avx512vl")]
45548    const fn test_mm_mask_add_ps() {
45549        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45550        let b = _mm_set1_ps(1.);
45551        let r = _mm_mask_add_ps(a, 0, a, b);
45552        assert_eq_m128(r, a);
45553        let r = _mm_mask_add_ps(a, 0b00001111, a, b);
45554        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
45555        assert_eq_m128(r, e);
45556    }
45557
45558    #[simd_test(enable = "avx512f,avx512vl")]
45559    const fn test_mm_maskz_add_ps() {
45560        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45561        let b = _mm_set1_ps(1.);
45562        let r = _mm_maskz_add_ps(0, a, b);
45563        assert_eq_m128(r, _mm_setzero_ps());
45564        let r = _mm_maskz_add_ps(0b00001111, a, b);
45565        let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
45566        assert_eq_m128(r, e);
45567    }
45568
45569    #[simd_test(enable = "avx512f")]
45570    const fn test_mm512_sub_epi32() {
45571        #[rustfmt::skip]
45572        let a = _mm512_setr_epi32(
45573            0, 1, -1, i32::MAX,
45574            i32::MIN, 100, -100, -32,
45575            0, 1, -1, i32::MAX,
45576            i32::MIN, 100, -100, -32,
45577        );
45578        let b = _mm512_set1_epi32(1);
45579        let r = _mm512_sub_epi32(a, b);
45580        #[rustfmt::skip]
45581        let e = _mm512_setr_epi32(
45582            -1, 0, -2, i32::MAX - 1,
45583            i32::MAX, 99, -101, -33,
45584            -1, 0, -2, i32::MAX - 1,
45585            i32::MAX, 99, -101, -33,
45586        );
45587        assert_eq_m512i(r, e);
45588    }
45589
45590    #[simd_test(enable = "avx512f")]
45591    const fn test_mm512_mask_sub_epi32() {
45592        #[rustfmt::skip]
45593        let a = _mm512_setr_epi32(
45594            0, 1, -1, i32::MAX,
45595            i32::MIN, 100, -100, -32,
45596            0, 1, -1, i32::MAX,
45597            i32::MIN, 100, -100, -32,
45598        );
45599        let b = _mm512_set1_epi32(1);
45600        let r = _mm512_mask_sub_epi32(a, 0, a, b);
45601        assert_eq_m512i(r, a);
45602        let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
45603        #[rustfmt::skip]
45604        let e = _mm512_setr_epi32(
45605            -1, 0, -2, i32::MAX - 1,
45606            i32::MAX, 99, -101, -33,
45607            0, 1, -1, i32::MAX,
45608            i32::MIN, 100, -100, -32,
45609        );
45610        assert_eq_m512i(r, e);
45611    }
45612
45613    #[simd_test(enable = "avx512f")]
45614    const fn test_mm512_maskz_sub_epi32() {
45615        #[rustfmt::skip]
45616        let a = _mm512_setr_epi32(
45617            0, 1, -1, i32::MAX,
45618            i32::MIN, 100, -100, -32,
45619            0, 1, -1, i32::MAX,
45620            i32::MIN, 100, -100, -32,
45621        );
45622        let b = _mm512_set1_epi32(1);
45623        let r = _mm512_maskz_sub_epi32(0, a, b);
45624        assert_eq_m512i(r, _mm512_setzero_si512());
45625        let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
45626        #[rustfmt::skip]
45627        let e = _mm512_setr_epi32(
45628            -1, 0, -2, i32::MAX - 1,
45629            i32::MAX, 99, -101, -33,
45630            0, 0, 0, 0,
45631            0, 0, 0, 0,
45632        );
45633        assert_eq_m512i(r, e);
45634    }
45635
45636    #[simd_test(enable = "avx512f,avx512vl")]
45637    const fn test_mm256_mask_sub_epi32() {
45638        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45639        let b = _mm256_set1_epi32(1);
45640        let r = _mm256_mask_sub_epi32(a, 0, a, b);
45641        assert_eq_m256i(r, a);
45642        let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
45643        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
45644        assert_eq_m256i(r, e);
45645    }
45646
45647    #[simd_test(enable = "avx512f,avx512vl")]
45648    const fn test_mm256_maskz_sub_epi32() {
45649        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45650        let b = _mm256_set1_epi32(1);
45651        let r = _mm256_maskz_sub_epi32(0, a, b);
45652        assert_eq_m256i(r, _mm256_setzero_si256());
45653        let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
45654        let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
45655        assert_eq_m256i(r, e);
45656    }
45657
45658    #[simd_test(enable = "avx512f,avx512vl")]
45659    const fn test_mm_mask_sub_epi32() {
45660        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45661        let b = _mm_set1_epi32(1);
45662        let r = _mm_mask_sub_epi32(a, 0, a, b);
45663        assert_eq_m128i(r, a);
45664        let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
45665        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
45666        assert_eq_m128i(r, e);
45667    }
45668
45669    #[simd_test(enable = "avx512f,avx512vl")]
45670    const fn test_mm_maskz_sub_epi32() {
45671        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45672        let b = _mm_set1_epi32(1);
45673        let r = _mm_maskz_sub_epi32(0, a, b);
45674        assert_eq_m128i(r, _mm_setzero_si128());
45675        let r = _mm_maskz_sub_epi32(0b00001111, a, b);
45676        let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
45677        assert_eq_m128i(r, e);
45678    }
45679
45680    #[simd_test(enable = "avx512f")]
45681    const fn test_mm512_sub_ps() {
45682        #[rustfmt::skip]
45683        let a = _mm512_setr_ps(
45684            0., 1., -1., f32::MAX,
45685            f32::MIN, 100., -100., -32.,
45686            0., 1., -1., f32::MAX,
45687            f32::MIN, 100., -100., -32.,
45688        );
45689        let b = _mm512_set1_ps(1.);
45690        let r = _mm512_sub_ps(a, b);
45691        #[rustfmt::skip]
45692        let e = _mm512_setr_ps(
45693            -1., 0., -2., f32::MAX - 1.,
45694            f32::MIN, 99., -101., -33.,
45695            -1., 0., -2., f32::MAX - 1.,
45696            f32::MIN, 99., -101., -33.,
45697        );
45698        assert_eq_m512(r, e);
45699    }
45700
45701    #[simd_test(enable = "avx512f")]
45702    const fn test_mm512_mask_sub_ps() {
45703        #[rustfmt::skip]
45704        let a = _mm512_setr_ps(
45705            0., 1., -1., f32::MAX,
45706            f32::MIN, 100., -100., -32.,
45707            0., 1., -1., f32::MAX,
45708            f32::MIN, 100., -100., -32.,
45709        );
45710        let b = _mm512_set1_ps(1.);
45711        let r = _mm512_mask_sub_ps(a, 0, a, b);
45712        assert_eq_m512(r, a);
45713        let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
45714        #[rustfmt::skip]
45715        let e = _mm512_setr_ps(
45716            -1., 0., -2., f32::MAX - 1.,
45717            f32::MIN, 99., -101., -33.,
45718            0., 1., -1., f32::MAX,
45719            f32::MIN, 100., -100., -32.,
45720        );
45721        assert_eq_m512(r, e);
45722    }
45723
45724    #[simd_test(enable = "avx512f")]
45725    const fn test_mm512_maskz_sub_ps() {
45726        #[rustfmt::skip]
45727        let a = _mm512_setr_ps(
45728            0., 1., -1., f32::MAX,
45729            f32::MIN, 100., -100., -32.,
45730            0., 1., -1., f32::MAX,
45731            f32::MIN, 100., -100., -32.,
45732        );
45733        let b = _mm512_set1_ps(1.);
45734        let r = _mm512_maskz_sub_ps(0, a, b);
45735        assert_eq_m512(r, _mm512_setzero_ps());
45736        let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
45737        #[rustfmt::skip]
45738        let e = _mm512_setr_ps(
45739            -1., 0., -2., f32::MAX - 1.,
45740            f32::MIN, 99., -101., -33.,
45741            0., 0., 0., 0.,
45742            0., 0., 0., 0.,
45743        );
45744        assert_eq_m512(r, e);
45745    }
45746
45747    #[simd_test(enable = "avx512f,avx512vl")]
45748    const fn test_mm256_mask_sub_ps() {
45749        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45750        let b = _mm256_set1_ps(1.);
45751        let r = _mm256_mask_sub_ps(a, 0, a, b);
45752        assert_eq_m256(r, a);
45753        let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
45754        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
45755        assert_eq_m256(r, e);
45756    }
45757
45758    #[simd_test(enable = "avx512f,avx512vl")]
45759    const fn test_mm256_maskz_sub_ps() {
45760        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45761        let b = _mm256_set1_ps(1.);
45762        let r = _mm256_maskz_sub_ps(0, a, b);
45763        assert_eq_m256(r, _mm256_setzero_ps());
45764        let r = _mm256_maskz_sub_ps(0b11111111, a, b);
45765        let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
45766        assert_eq_m256(r, e);
45767    }
45768
45769    #[simd_test(enable = "avx512f,avx512vl")]
45770    const fn test_mm_mask_sub_ps() {
45771        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45772        let b = _mm_set1_ps(1.);
45773        let r = _mm_mask_sub_ps(a, 0, a, b);
45774        assert_eq_m128(r, a);
45775        let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
45776        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
45777        assert_eq_m128(r, e);
45778    }
45779
45780    #[simd_test(enable = "avx512f,avx512vl")]
45781    const fn test_mm_maskz_sub_ps() {
45782        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45783        let b = _mm_set1_ps(1.);
45784        let r = _mm_maskz_sub_ps(0, a, b);
45785        assert_eq_m128(r, _mm_setzero_ps());
45786        let r = _mm_maskz_sub_ps(0b00001111, a, b);
45787        let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
45788        assert_eq_m128(r, e);
45789    }
45790
45791    #[simd_test(enable = "avx512f")]
45792    const fn test_mm512_mullo_epi32() {
45793        #[rustfmt::skip]
45794        let a = _mm512_setr_epi32(
45795            0, 1, -1, i32::MAX,
45796            i32::MIN, 100, -100, -32,
45797            0, 1, -1, i32::MAX,
45798            i32::MIN, 100, -100, -32,
45799        );
45800        let b = _mm512_set1_epi32(2);
45801        let r = _mm512_mullo_epi32(a, b);
45802        let e = _mm512_setr_epi32(
45803            0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
45804        );
45805        assert_eq_m512i(r, e);
45806    }
45807
45808    #[simd_test(enable = "avx512f")]
45809    const fn test_mm512_mask_mullo_epi32() {
45810        #[rustfmt::skip]
45811        let a = _mm512_setr_epi32(
45812            0, 1, -1, i32::MAX,
45813            i32::MIN, 100, -100, -32,
45814            0, 1, -1, i32::MAX,
45815            i32::MIN, 100, -100, -32,
45816        );
45817        let b = _mm512_set1_epi32(2);
45818        let r = _mm512_mask_mullo_epi32(a, 0, a, b);
45819        assert_eq_m512i(r, a);
45820        let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
45821        #[rustfmt::skip]
45822        let e = _mm512_setr_epi32(
45823            0, 2, -2, -2,
45824            0, 200, -200, -64,
45825            0, 1, -1, i32::MAX,
45826            i32::MIN, 100, -100, -32,
45827        );
45828        assert_eq_m512i(r, e);
45829    }
45830
45831    #[simd_test(enable = "avx512f")]
45832    const fn test_mm512_maskz_mullo_epi32() {
45833        #[rustfmt::skip]
45834        let a = _mm512_setr_epi32(
45835            0, 1, -1, i32::MAX,
45836            i32::MIN, 100, -100, -32,
45837            0, 1, -1, i32::MAX,
45838            i32::MIN, 100, -100, -32,
45839        );
45840        let b = _mm512_set1_epi32(2);
45841        let r = _mm512_maskz_mullo_epi32(0, a, b);
45842        assert_eq_m512i(r, _mm512_setzero_si512());
45843        let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
45844        let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
45845        assert_eq_m512i(r, e);
45846    }
45847
45848    #[simd_test(enable = "avx512f,avx512vl")]
45849    const fn test_mm256_mask_mullo_epi32() {
45850        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45851        let b = _mm256_set1_epi32(2);
45852        let r = _mm256_mask_mullo_epi32(a, 0, a, b);
45853        assert_eq_m256i(r, a);
45854        let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
45855        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
45856        assert_eq_m256i(r, e);
45857    }
45858
45859    #[simd_test(enable = "avx512f,avx512vl")]
45860    const fn test_mm256_maskz_mullo_epi32() {
45861        let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
45862        let b = _mm256_set1_epi32(2);
45863        let r = _mm256_maskz_mullo_epi32(0, a, b);
45864        assert_eq_m256i(r, _mm256_setzero_si256());
45865        let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
45866        let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
45867        assert_eq_m256i(r, e);
45868    }
45869
45870    #[simd_test(enable = "avx512f,avx512vl")]
45871    const fn test_mm_mask_mullo_epi32() {
45872        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45873        let b = _mm_set1_epi32(2);
45874        let r = _mm_mask_mullo_epi32(a, 0, a, b);
45875        assert_eq_m128i(r, a);
45876        let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
45877        let e = _mm_set_epi32(2, -2, -2, 0);
45878        assert_eq_m128i(r, e);
45879    }
45880
45881    #[simd_test(enable = "avx512f,avx512vl")]
45882    const fn test_mm_maskz_mullo_epi32() {
45883        let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
45884        let b = _mm_set1_epi32(2);
45885        let r = _mm_maskz_mullo_epi32(0, a, b);
45886        assert_eq_m128i(r, _mm_setzero_si128());
45887        let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
45888        let e = _mm_set_epi32(2, -2, -2, 0);
45889        assert_eq_m128i(r, e);
45890    }
45891
45892    #[simd_test(enable = "avx512f")]
45893    const fn test_mm512_mul_ps() {
45894        #[rustfmt::skip]
45895        let a = _mm512_setr_ps(
45896            0., 1., -1., f32::MAX,
45897            f32::MIN, 100., -100., -32.,
45898            0., 1., -1., f32::MAX,
45899            f32::MIN, 100., -100., -32.,
45900        );
45901        let b = _mm512_set1_ps(2.);
45902        let r = _mm512_mul_ps(a, b);
45903        #[rustfmt::skip]
45904        let e = _mm512_setr_ps(
45905            0., 2., -2., f32::INFINITY,
45906            f32::NEG_INFINITY, 200., -200., -64.,
45907            0., 2., -2., f32::INFINITY,
45908            f32::NEG_INFINITY, 200., -200.,
45909            -64.,
45910        );
45911        assert_eq_m512(r, e);
45912    }
45913
45914    #[simd_test(enable = "avx512f")]
45915    const fn test_mm512_mask_mul_ps() {
45916        #[rustfmt::skip]
45917        let a = _mm512_setr_ps(
45918            0., 1., -1., f32::MAX,
45919            f32::MIN, 100., -100., -32.,
45920            0., 1., -1., f32::MAX,
45921            f32::MIN, 100., -100., -32.,
45922        );
45923        let b = _mm512_set1_ps(2.);
45924        let r = _mm512_mask_mul_ps(a, 0, a, b);
45925        assert_eq_m512(r, a);
45926        let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
45927        #[rustfmt::skip]
45928        let e = _mm512_setr_ps(
45929            0., 2., -2., f32::INFINITY,
45930            f32::NEG_INFINITY, 200., -200., -64.,
45931            0., 1., -1., f32::MAX,
45932            f32::MIN, 100., -100., -32.,
45933        );
45934        assert_eq_m512(r, e);
45935    }
45936
45937    #[simd_test(enable = "avx512f")]
45938    const fn test_mm512_maskz_mul_ps() {
45939        #[rustfmt::skip]
45940        let a = _mm512_setr_ps(
45941            0., 1., -1., f32::MAX,
45942            f32::MIN, 100., -100., -32.,
45943            0., 1., -1., f32::MAX,
45944            f32::MIN, 100., -100., -32.,
45945        );
45946        let b = _mm512_set1_ps(2.);
45947        let r = _mm512_maskz_mul_ps(0, a, b);
45948        assert_eq_m512(r, _mm512_setzero_ps());
45949        let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
45950        #[rustfmt::skip]
45951        let e = _mm512_setr_ps(
45952            0., 2., -2., f32::INFINITY,
45953            f32::NEG_INFINITY, 200., -200., -64.,
45954            0., 0., 0., 0.,
45955            0., 0., 0., 0.,
45956        );
45957        assert_eq_m512(r, e);
45958    }
45959
45960    #[simd_test(enable = "avx512f,avx512vl")]
45961    const fn test_mm256_mask_mul_ps() {
45962        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45963        let b = _mm256_set1_ps(2.);
45964        let r = _mm256_mask_mul_ps(a, 0, a, b);
45965        assert_eq_m256(r, a);
45966        let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
45967        #[rustfmt::skip]
45968        let e = _mm256_set_ps(
45969            0., 2., -2., f32::INFINITY,
45970            f32::NEG_INFINITY, 200., -200., -64.,
45971        );
45972        assert_eq_m256(r, e);
45973    }
45974
45975    #[simd_test(enable = "avx512f,avx512vl")]
45976    const fn test_mm256_maskz_mul_ps() {
45977        let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
45978        let b = _mm256_set1_ps(2.);
45979        let r = _mm256_maskz_mul_ps(0, a, b);
45980        assert_eq_m256(r, _mm256_setzero_ps());
45981        let r = _mm256_maskz_mul_ps(0b11111111, a, b);
45982        #[rustfmt::skip]
45983        let e = _mm256_set_ps(
45984            0., 2., -2., f32::INFINITY,
45985            f32::NEG_INFINITY, 200., -200., -64.,
45986        );
45987        assert_eq_m256(r, e);
45988    }
45989
45990    #[simd_test(enable = "avx512f,avx512vl")]
45991    const fn test_mm_mask_mul_ps() {
45992        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
45993        let b = _mm_set1_ps(2.);
45994        let r = _mm_mask_mul_ps(a, 0, a, b);
45995        assert_eq_m128(r, a);
45996        let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
45997        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
45998        assert_eq_m128(r, e);
45999    }
46000
46001    #[simd_test(enable = "avx512f,avx512vl")]
46002    const fn test_mm_maskz_mul_ps() {
46003        let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
46004        let b = _mm_set1_ps(2.);
46005        let r = _mm_maskz_mul_ps(0, a, b);
46006        assert_eq_m128(r, _mm_setzero_ps());
46007        let r = _mm_maskz_mul_ps(0b00001111, a, b);
46008        let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
46009        assert_eq_m128(r, e);
46010    }
46011
46012    #[simd_test(enable = "avx512f")]
46013    const fn test_mm512_div_ps() {
46014        let a = _mm512_setr_ps(
46015            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
46016        );
46017        let b = _mm512_setr_ps(
46018            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
46019        );
46020        let r = _mm512_div_ps(a, b);
46021        #[rustfmt::skip]
46022        let e = _mm512_setr_ps(
46023            0., 0.5, -0.5, -1.,
46024            50., f32::INFINITY, -50., -16.,
46025            0., 0.5, -0.5, 500.,
46026            f32::NEG_INFINITY, 50., -50., -16.,
46027        );
46028        assert_eq_m512(r, e); // 0/0 = NAN
46029    }
46030
46031    #[simd_test(enable = "avx512f")]
46032    const fn test_mm512_mask_div_ps() {
46033        let a = _mm512_setr_ps(
46034            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
46035        );
46036        let b = _mm512_setr_ps(
46037            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
46038        );
46039        let r = _mm512_mask_div_ps(a, 0, a, b);
46040        assert_eq_m512(r, a);
46041        let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
46042        #[rustfmt::skip]
46043        let e = _mm512_setr_ps(
46044            0., 0.5, -0.5, -1.,
46045            50., f32::INFINITY, -50., -16.,
46046            0., 1., -1., 1000.,
46047            -131., 100., -100., -32.,
46048        );
46049        assert_eq_m512(r, e);
46050    }
46051
46052    #[simd_test(enable = "avx512f")]
46053    const fn test_mm512_maskz_div_ps() {
46054        let a = _mm512_setr_ps(
46055            0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
46056        );
46057        let b = _mm512_setr_ps(
46058            2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
46059        );
46060        let r = _mm512_maskz_div_ps(0, a, b);
46061        assert_eq_m512(r, _mm512_setzero_ps());
46062        let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
46063        #[rustfmt::skip]
46064        let e = _mm512_setr_ps(
46065            0., 0.5, -0.5, -1.,
46066            50., f32::INFINITY, -50., -16.,
46067            0., 0., 0., 0.,
46068            0., 0., 0., 0.,
46069        );
46070        assert_eq_m512(r, e);
46071    }
46072
46073    #[simd_test(enable = "avx512f,avx512vl")]
46074    const fn test_mm256_mask_div_ps() {
46075        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
46076        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
46077        let r = _mm256_mask_div_ps(a, 0, a, b);
46078        assert_eq_m256(r, a);
46079        let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
46080        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
46081        assert_eq_m256(r, e);
46082    }
46083
46084    #[simd_test(enable = "avx512f,avx512vl")]
46085    const fn test_mm256_maskz_div_ps() {
46086        let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
46087        let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
46088        let r = _mm256_maskz_div_ps(0, a, b);
46089        assert_eq_m256(r, _mm256_setzero_ps());
46090        let r = _mm256_maskz_div_ps(0b11111111, a, b);
46091        let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
46092        assert_eq_m256(r, e);
46093    }
46094
46095    #[simd_test(enable = "avx512f,avx512vl")]
46096    const fn test_mm_mask_div_ps() {
46097        let a = _mm_set_ps(100., 100., -100., -32.);
46098        let b = _mm_set_ps(2., 0., 2., 2.);
46099        let r = _mm_mask_div_ps(a, 0, a, b);
46100        assert_eq_m128(r, a);
46101        let r = _mm_mask_div_ps(a, 0b00001111, a, b);
46102        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
46103        assert_eq_m128(r, e);
46104    }
46105
46106    #[simd_test(enable = "avx512f,avx512vl")]
46107    const fn test_mm_maskz_div_ps() {
46108        let a = _mm_set_ps(100., 100., -100., -32.);
46109        let b = _mm_set_ps(2., 0., 2., 2.);
46110        let r = _mm_maskz_div_ps(0, a, b);
46111        assert_eq_m128(r, _mm_setzero_ps());
46112        let r = _mm_maskz_div_ps(0b00001111, a, b);
46113        let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
46114        assert_eq_m128(r, e);
46115    }
46116
46117    #[simd_test(enable = "avx512f")]
46118    const fn test_mm512_max_epi32() {
46119        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46120        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46121        let r = _mm512_max_epi32(a, b);
46122        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46123        assert_eq_m512i(r, e);
46124    }
46125
46126    #[simd_test(enable = "avx512f")]
46127    const fn test_mm512_mask_max_epi32() {
46128        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46129        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46130        let r = _mm512_mask_max_epi32(a, 0, a, b);
46131        assert_eq_m512i(r, a);
46132        let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
46133        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46134        assert_eq_m512i(r, e);
46135    }
46136
46137    #[simd_test(enable = "avx512f")]
46138    const fn test_mm512_maskz_max_epi32() {
46139        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46140        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46141        let r = _mm512_maskz_max_epi32(0, a, b);
46142        assert_eq_m512i(r, _mm512_setzero_si512());
46143        let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
46144        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
46145        assert_eq_m512i(r, e);
46146    }
46147
46148    #[simd_test(enable = "avx512f,avx512vl")]
46149    const fn test_mm256_mask_max_epi32() {
46150        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46151        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46152        let r = _mm256_mask_max_epi32(a, 0, a, b);
46153        assert_eq_m256i(r, a);
46154        let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
46155        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46156        assert_eq_m256i(r, e);
46157    }
46158
46159    #[simd_test(enable = "avx512f,avx512vl")]
46160    const fn test_mm256_maskz_max_epi32() {
46161        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46162        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46163        let r = _mm256_maskz_max_epi32(0, a, b);
46164        assert_eq_m256i(r, _mm256_setzero_si256());
46165        let r = _mm256_maskz_max_epi32(0b11111111, a, b);
46166        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46167        assert_eq_m256i(r, e);
46168    }
46169
46170    #[simd_test(enable = "avx512f,avx512vl")]
46171    const fn test_mm_mask_max_epi32() {
46172        let a = _mm_set_epi32(0, 1, 2, 3);
46173        let b = _mm_set_epi32(3, 2, 1, 0);
46174        let r = _mm_mask_max_epi32(a, 0, a, b);
46175        assert_eq_m128i(r, a);
46176        let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
46177        let e = _mm_set_epi32(3, 2, 2, 3);
46178        assert_eq_m128i(r, e);
46179    }
46180
46181    #[simd_test(enable = "avx512f,avx512vl")]
46182    const fn test_mm_maskz_max_epi32() {
46183        let a = _mm_set_epi32(0, 1, 2, 3);
46184        let b = _mm_set_epi32(3, 2, 1, 0);
46185        let r = _mm_maskz_max_epi32(0, a, b);
46186        assert_eq_m128i(r, _mm_setzero_si128());
46187        let r = _mm_maskz_max_epi32(0b00001111, a, b);
46188        let e = _mm_set_epi32(3, 2, 2, 3);
46189        assert_eq_m128i(r, e);
46190    }
46191
46192    #[simd_test(enable = "avx512f")]
46193    fn test_mm512_max_ps() {
46194        let a = _mm512_setr_ps(
46195            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46196        );
46197        let b = _mm512_setr_ps(
46198            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46199        );
46200        let r = _mm512_max_ps(a, b);
46201        let e = _mm512_setr_ps(
46202            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
46203        );
46204        assert_eq_m512(r, e);
46205    }
46206
46207    #[simd_test(enable = "avx512f")]
46208    fn test_mm512_mask_max_ps() {
46209        let a = _mm512_setr_ps(
46210            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46211        );
46212        let b = _mm512_setr_ps(
46213            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46214        );
46215        let r = _mm512_mask_max_ps(a, 0, a, b);
46216        assert_eq_m512(r, a);
46217        let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
46218        let e = _mm512_setr_ps(
46219            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
46220        );
46221        assert_eq_m512(r, e);
46222    }
46223
46224    #[simd_test(enable = "avx512f")]
46225    fn test_mm512_maskz_max_ps() {
46226        let a = _mm512_setr_ps(
46227            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46228        );
46229        let b = _mm512_setr_ps(
46230            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46231        );
46232        let r = _mm512_maskz_max_ps(0, a, b);
46233        assert_eq_m512(r, _mm512_setzero_ps());
46234        let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
46235        let e = _mm512_setr_ps(
46236            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46237        );
46238        assert_eq_m512(r, e);
46239    }
46240
46241    #[simd_test(enable = "avx512f,avx512vl")]
46242    fn test_mm256_mask_max_ps() {
46243        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46244        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46245        let r = _mm256_mask_max_ps(a, 0, a, b);
46246        assert_eq_m256(r, a);
46247        let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
46248        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
46249        assert_eq_m256(r, e);
46250    }
46251
46252    #[simd_test(enable = "avx512f,avx512vl")]
46253    fn test_mm256_maskz_max_ps() {
46254        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46255        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46256        let r = _mm256_maskz_max_ps(0, a, b);
46257        assert_eq_m256(r, _mm256_setzero_ps());
46258        let r = _mm256_maskz_max_ps(0b11111111, a, b);
46259        let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
46260        assert_eq_m256(r, e);
46261    }
46262
46263    #[simd_test(enable = "avx512f,avx512vl")]
46264    fn test_mm_mask_max_ps() {
46265        let a = _mm_set_ps(0., 1., 2., 3.);
46266        let b = _mm_set_ps(3., 2., 1., 0.);
46267        let r = _mm_mask_max_ps(a, 0, a, b);
46268        assert_eq_m128(r, a);
46269        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
46270        let e = _mm_set_ps(3., 2., 2., 3.);
46271        assert_eq_m128(r, e);
46272    }
46273
46274    #[simd_test(enable = "avx512f,avx512vl")]
46275    fn test_mm_maskz_max_ps() {
46276        let a = _mm_set_ps(0., 1., 2., 3.);
46277        let b = _mm_set_ps(3., 2., 1., 0.);
46278        let r = _mm_maskz_max_ps(0, a, b);
46279        assert_eq_m128(r, _mm_setzero_ps());
46280        let r = _mm_mask_max_ps(a, 0b00001111, a, b);
46281        let e = _mm_set_ps(3., 2., 2., 3.);
46282        assert_eq_m128(r, e);
46283    }
46284
46285    #[simd_test(enable = "avx512f")]
46286    const fn test_mm512_max_epu32() {
46287        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46288        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46289        let r = _mm512_max_epu32(a, b);
46290        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46291        assert_eq_m512i(r, e);
46292    }
46293
46294    #[simd_test(enable = "avx512f")]
46295    const fn test_mm512_mask_max_epu32() {
46296        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46297        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46298        let r = _mm512_mask_max_epu32(a, 0, a, b);
46299        assert_eq_m512i(r, a);
46300        let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
46301        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
46302        assert_eq_m512i(r, e);
46303    }
46304
46305    #[simd_test(enable = "avx512f")]
46306    const fn test_mm512_maskz_max_epu32() {
46307        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46308        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46309        let r = _mm512_maskz_max_epu32(0, a, b);
46310        assert_eq_m512i(r, _mm512_setzero_si512());
46311        let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
46312        let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
46313        assert_eq_m512i(r, e);
46314    }
46315
46316    #[simd_test(enable = "avx512f,avx512vl")]
46317    const fn test_mm256_mask_max_epu32() {
46318        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46319        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46320        let r = _mm256_mask_max_epu32(a, 0, a, b);
46321        assert_eq_m256i(r, a);
46322        let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
46323        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46324        assert_eq_m256i(r, e);
46325    }
46326
46327    #[simd_test(enable = "avx512f,avx512vl")]
46328    const fn test_mm256_maskz_max_epu32() {
46329        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46330        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46331        let r = _mm256_maskz_max_epu32(0, a, b);
46332        assert_eq_m256i(r, _mm256_setzero_si256());
46333        let r = _mm256_maskz_max_epu32(0b11111111, a, b);
46334        let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
46335        assert_eq_m256i(r, e);
46336    }
46337
46338    #[simd_test(enable = "avx512f,avx512vl")]
46339    const fn test_mm_mask_max_epu32() {
46340        let a = _mm_set_epi32(0, 1, 2, 3);
46341        let b = _mm_set_epi32(3, 2, 1, 0);
46342        let r = _mm_mask_max_epu32(a, 0, a, b);
46343        assert_eq_m128i(r, a);
46344        let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
46345        let e = _mm_set_epi32(3, 2, 2, 3);
46346        assert_eq_m128i(r, e);
46347    }
46348
46349    #[simd_test(enable = "avx512f,avx512vl")]
46350    const fn test_mm_maskz_max_epu32() {
46351        let a = _mm_set_epi32(0, 1, 2, 3);
46352        let b = _mm_set_epi32(3, 2, 1, 0);
46353        let r = _mm_maskz_max_epu32(0, a, b);
46354        assert_eq_m128i(r, _mm_setzero_si128());
46355        let r = _mm_maskz_max_epu32(0b00001111, a, b);
46356        let e = _mm_set_epi32(3, 2, 2, 3);
46357        assert_eq_m128i(r, e);
46358    }
46359
46360    #[simd_test(enable = "avx512f")]
46361    const fn test_mm512_min_epi32() {
46362        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46363        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46364        let r = _mm512_min_epi32(a, b);
46365        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
46366        assert_eq_m512i(r, e);
46367    }
46368
46369    #[simd_test(enable = "avx512f")]
46370    const fn test_mm512_mask_min_epi32() {
46371        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46372        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46373        let r = _mm512_mask_min_epi32(a, 0, a, b);
46374        assert_eq_m512i(r, a);
46375        let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
46376        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46377        assert_eq_m512i(r, e);
46378    }
46379
46380    #[simd_test(enable = "avx512f")]
46381    const fn test_mm512_maskz_min_epi32() {
46382        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46383        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46384        let r = _mm512_maskz_min_epi32(0, a, b);
46385        assert_eq_m512i(r, _mm512_setzero_si512());
46386        let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
46387        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
46388        assert_eq_m512i(r, e);
46389    }
46390
46391    #[simd_test(enable = "avx512f,avx512vl")]
46392    const fn test_mm256_mask_min_epi32() {
46393        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46394        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46395        let r = _mm256_mask_min_epi32(a, 0, a, b);
46396        assert_eq_m256i(r, a);
46397        let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
46398        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46399        assert_eq_m256i(r, e);
46400    }
46401
46402    #[simd_test(enable = "avx512f,avx512vl")]
46403    const fn test_mm256_maskz_min_epi32() {
46404        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46405        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46406        let r = _mm256_maskz_min_epi32(0, a, b);
46407        assert_eq_m256i(r, _mm256_setzero_si256());
46408        let r = _mm256_maskz_min_epi32(0b11111111, a, b);
46409        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46410        assert_eq_m256i(r, e);
46411    }
46412
46413    #[simd_test(enable = "avx512f,avx512vl")]
46414    const fn test_mm_mask_min_epi32() {
46415        let a = _mm_set_epi32(0, 1, 2, 3);
46416        let b = _mm_set_epi32(3, 2, 1, 0);
46417        let r = _mm_mask_min_epi32(a, 0, a, b);
46418        assert_eq_m128i(r, a);
46419        let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
46420        let e = _mm_set_epi32(0, 1, 1, 0);
46421        assert_eq_m128i(r, e);
46422    }
46423
46424    #[simd_test(enable = "avx512f,avx512vl")]
46425    const fn test_mm_maskz_min_epi32() {
46426        let a = _mm_set_epi32(0, 1, 2, 3);
46427        let b = _mm_set_epi32(3, 2, 1, 0);
46428        let r = _mm_maskz_min_epi32(0, a, b);
46429        assert_eq_m128i(r, _mm_setzero_si128());
46430        let r = _mm_maskz_min_epi32(0b00001111, a, b);
46431        let e = _mm_set_epi32(0, 1, 1, 0);
46432        assert_eq_m128i(r, e);
46433    }
46434
46435    #[simd_test(enable = "avx512f")]
46436    fn test_mm512_min_ps() {
46437        let a = _mm512_setr_ps(
46438            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46439        );
46440        let b = _mm512_setr_ps(
46441            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46442        );
46443        let r = _mm512_min_ps(a, b);
46444        let e = _mm512_setr_ps(
46445            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
46446        );
46447        assert_eq_m512(r, e);
46448    }
46449
46450    #[simd_test(enable = "avx512f")]
46451    fn test_mm512_mask_min_ps() {
46452        let a = _mm512_setr_ps(
46453            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46454        );
46455        let b = _mm512_setr_ps(
46456            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46457        );
46458        let r = _mm512_mask_min_ps(a, 0, a, b);
46459        assert_eq_m512(r, a);
46460        let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
46461        let e = _mm512_setr_ps(
46462            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46463        );
46464        assert_eq_m512(r, e);
46465    }
46466
46467    #[simd_test(enable = "avx512f")]
46468    fn test_mm512_maskz_min_ps() {
46469        let a = _mm512_setr_ps(
46470            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46471        );
46472        let b = _mm512_setr_ps(
46473            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
46474        );
46475        let r = _mm512_maskz_min_ps(0, a, b);
46476        assert_eq_m512(r, _mm512_setzero_ps());
46477        let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
46478        let e = _mm512_setr_ps(
46479            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
46480        );
46481        assert_eq_m512(r, e);
46482    }
46483
46484    #[simd_test(enable = "avx512f,avx512vl")]
46485    fn test_mm256_mask_min_ps() {
46486        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46487        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46488        let r = _mm256_mask_min_ps(a, 0, a, b);
46489        assert_eq_m256(r, a);
46490        let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
46491        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
46492        assert_eq_m256(r, e);
46493    }
46494
46495    #[simd_test(enable = "avx512f,avx512vl")]
46496    fn test_mm256_maskz_min_ps() {
46497        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46498        let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
46499        let r = _mm256_maskz_min_ps(0, a, b);
46500        assert_eq_m256(r, _mm256_setzero_ps());
46501        let r = _mm256_maskz_min_ps(0b11111111, a, b);
46502        let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
46503        assert_eq_m256(r, e);
46504    }
46505
46506    #[simd_test(enable = "avx512f,avx512vl")]
46507    fn test_mm_mask_min_ps() {
46508        let a = _mm_set_ps(0., 1., 2., 3.);
46509        let b = _mm_set_ps(3., 2., 1., 0.);
46510        let r = _mm_mask_min_ps(a, 0, a, b);
46511        assert_eq_m128(r, a);
46512        let r = _mm_mask_min_ps(a, 0b00001111, a, b);
46513        let e = _mm_set_ps(0., 1., 1., 0.);
46514        assert_eq_m128(r, e);
46515    }
46516
46517    #[simd_test(enable = "avx512f,avx512vl")]
46518    fn test_mm_maskz_min_ps() {
46519        let a = _mm_set_ps(0., 1., 2., 3.);
46520        let b = _mm_set_ps(3., 2., 1., 0.);
46521        let r = _mm_maskz_min_ps(0, a, b);
46522        assert_eq_m128(r, _mm_setzero_ps());
46523        let r = _mm_maskz_min_ps(0b00001111, a, b);
46524        let e = _mm_set_ps(0., 1., 1., 0.);
46525        assert_eq_m128(r, e);
46526    }
46527
46528    #[simd_test(enable = "avx512f")]
46529    const fn test_mm512_min_epu32() {
46530        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46531        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46532        let r = _mm512_min_epu32(a, b);
46533        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
46534        assert_eq_m512i(r, e);
46535    }
46536
46537    #[simd_test(enable = "avx512f")]
46538    const fn test_mm512_mask_min_epu32() {
46539        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46540        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46541        let r = _mm512_mask_min_epu32(a, 0, a, b);
46542        assert_eq_m512i(r, a);
46543        let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
46544        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46545        assert_eq_m512i(r, e);
46546    }
46547
46548    #[simd_test(enable = "avx512f")]
46549    const fn test_mm512_maskz_min_epu32() {
46550        let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
46551        let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
46552        let r = _mm512_maskz_min_epu32(0, a, b);
46553        assert_eq_m512i(r, _mm512_setzero_si512());
46554        let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
46555        let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
46556        assert_eq_m512i(r, e);
46557    }
46558
46559    #[simd_test(enable = "avx512f,avx512vl")]
46560    const fn test_mm256_mask_min_epu32() {
46561        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46562        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46563        let r = _mm256_mask_min_epu32(a, 0, a, b);
46564        assert_eq_m256i(r, a);
46565        let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
46566        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46567        assert_eq_m256i(r, e);
46568    }
46569
46570    #[simd_test(enable = "avx512f,avx512vl")]
46571    const fn test_mm256_maskz_min_epu32() {
46572        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
46573        let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
46574        let r = _mm256_maskz_min_epu32(0, a, b);
46575        assert_eq_m256i(r, _mm256_setzero_si256());
46576        let r = _mm256_maskz_min_epu32(0b11111111, a, b);
46577        let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
46578        assert_eq_m256i(r, e);
46579    }
46580
46581    #[simd_test(enable = "avx512f,avx512vl")]
46582    const fn test_mm_mask_min_epu32() {
46583        let a = _mm_set_epi32(0, 1, 2, 3);
46584        let b = _mm_set_epi32(3, 2, 1, 0);
46585        let r = _mm_mask_min_epu32(a, 0, a, b);
46586        assert_eq_m128i(r, a);
46587        let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
46588        let e = _mm_set_epi32(0, 1, 1, 0);
46589        assert_eq_m128i(r, e);
46590    }
46591
46592    #[simd_test(enable = "avx512f,avx512vl")]
46593    const fn test_mm_maskz_min_epu32() {
46594        let a = _mm_set_epi32(0, 1, 2, 3);
46595        let b = _mm_set_epi32(3, 2, 1, 0);
46596        let r = _mm_maskz_min_epu32(0, a, b);
46597        assert_eq_m128i(r, _mm_setzero_si128());
46598        let r = _mm_maskz_min_epu32(0b00001111, a, b);
46599        let e = _mm_set_epi32(0, 1, 1, 0);
46600        assert_eq_m128i(r, e);
46601    }
46602
46603    #[simd_test(enable = "avx512f")]
46604    fn test_mm512_sqrt_ps() {
46605        let a = _mm512_setr_ps(
46606            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
46607        );
46608        let r = _mm512_sqrt_ps(a);
46609        let e = _mm512_setr_ps(
46610            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46611        );
46612        assert_eq_m512(r, e);
46613    }
46614
46615    #[simd_test(enable = "avx512f")]
46616    fn test_mm512_mask_sqrt_ps() {
46617        let a = _mm512_setr_ps(
46618            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
46619        );
46620        let r = _mm512_mask_sqrt_ps(a, 0, a);
46621        assert_eq_m512(r, a);
46622        let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
46623        let e = _mm512_setr_ps(
46624            0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
46625        );
46626        assert_eq_m512(r, e);
46627    }
46628
46629    #[simd_test(enable = "avx512f")]
46630    fn test_mm512_maskz_sqrt_ps() {
46631        let a = _mm512_setr_ps(
46632            0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
46633        );
46634        let r = _mm512_maskz_sqrt_ps(0, a);
46635        assert_eq_m512(r, _mm512_setzero_ps());
46636        let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
46637        let e = _mm512_setr_ps(
46638            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
46639        );
46640        assert_eq_m512(r, e);
46641    }
46642
46643    #[simd_test(enable = "avx512f,avx512vl")]
46644    fn test_mm256_mask_sqrt_ps() {
46645        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
46646        let r = _mm256_mask_sqrt_ps(a, 0, a);
46647        assert_eq_m256(r, a);
46648        let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
46649        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46650        assert_eq_m256(r, e);
46651    }
46652
46653    #[simd_test(enable = "avx512f,avx512vl")]
46654    fn test_mm256_maskz_sqrt_ps() {
46655        let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
46656        let r = _mm256_maskz_sqrt_ps(0, a);
46657        assert_eq_m256(r, _mm256_setzero_ps());
46658        let r = _mm256_maskz_sqrt_ps(0b11111111, a);
46659        let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46660        assert_eq_m256(r, e);
46661    }
46662
46663    #[simd_test(enable = "avx512f,avx512vl")]
46664    fn test_mm_mask_sqrt_ps() {
46665        let a = _mm_set_ps(0., 1., 4., 9.);
46666        let r = _mm_mask_sqrt_ps(a, 0, a);
46667        assert_eq_m128(r, a);
46668        let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
46669        let e = _mm_set_ps(0., 1., 2., 3.);
46670        assert_eq_m128(r, e);
46671    }
46672
46673    #[simd_test(enable = "avx512f,avx512vl")]
46674    fn test_mm_maskz_sqrt_ps() {
46675        let a = _mm_set_ps(0., 1., 4., 9.);
46676        let r = _mm_maskz_sqrt_ps(0, a);
46677        assert_eq_m128(r, _mm_setzero_ps());
46678        let r = _mm_maskz_sqrt_ps(0b00001111, a);
46679        let e = _mm_set_ps(0., 1., 2., 3.);
46680        assert_eq_m128(r, e);
46681    }
46682
46683    #[simd_test(enable = "avx512f")]
46684    const fn test_mm512_fmadd_ps() {
46685        let a = _mm512_set1_ps(1.);
46686        let b = _mm512_setr_ps(
46687            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46688        );
46689        let c = _mm512_set1_ps(1.);
46690        let r = _mm512_fmadd_ps(a, b, c);
46691        let e = _mm512_setr_ps(
46692            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
46693        );
46694        assert_eq_m512(r, e);
46695    }
46696
46697    #[simd_test(enable = "avx512f")]
46698    const fn test_mm512_mask_fmadd_ps() {
46699        let a = _mm512_set1_ps(1.);
46700        let b = _mm512_setr_ps(
46701            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46702        );
46703        let c = _mm512_set1_ps(1.);
46704        let r = _mm512_mask_fmadd_ps(a, 0, b, c);
46705        assert_eq_m512(r, a);
46706        let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
46707        let e = _mm512_setr_ps(
46708            1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46709        );
46710        assert_eq_m512(r, e);
46711    }
46712
46713    #[simd_test(enable = "avx512f")]
46714    const fn test_mm512_maskz_fmadd_ps() {
46715        let a = _mm512_set1_ps(1.);
46716        let b = _mm512_setr_ps(
46717            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46718        );
46719        let c = _mm512_set1_ps(1.);
46720        let r = _mm512_maskz_fmadd_ps(0, a, b, c);
46721        assert_eq_m512(r, _mm512_setzero_ps());
46722        let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
46723        let e = _mm512_setr_ps(
46724            1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46725        );
46726        assert_eq_m512(r, e);
46727    }
46728
46729    #[simd_test(enable = "avx512f")]
46730    const fn test_mm512_mask3_fmadd_ps() {
46731        let a = _mm512_set1_ps(1.);
46732        let b = _mm512_setr_ps(
46733            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46734        );
46735        let c = _mm512_set1_ps(2.);
46736        let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
46737        assert_eq_m512(r, c);
46738        let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
46739        let e = _mm512_setr_ps(
46740            2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
46741        );
46742        assert_eq_m512(r, e);
46743    }
46744
46745    #[simd_test(enable = "avx512f,avx512vl")]
46746    const fn test_mm256_mask_fmadd_ps() {
46747        let a = _mm256_set1_ps(1.);
46748        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46749        let c = _mm256_set1_ps(1.);
46750        let r = _mm256_mask_fmadd_ps(a, 0, b, c);
46751        assert_eq_m256(r, a);
46752        let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
46753        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46754        assert_eq_m256(r, e);
46755    }
46756
46757    #[simd_test(enable = "avx512f,avx512vl")]
46758    const fn test_mm256_maskz_fmadd_ps() {
46759        let a = _mm256_set1_ps(1.);
46760        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46761        let c = _mm256_set1_ps(1.);
46762        let r = _mm256_maskz_fmadd_ps(0, a, b, c);
46763        assert_eq_m256(r, _mm256_setzero_ps());
46764        let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
46765        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46766        assert_eq_m256(r, e);
46767    }
46768
46769    #[simd_test(enable = "avx512f,avx512vl")]
46770    const fn test_mm256_mask3_fmadd_ps() {
46771        let a = _mm256_set1_ps(1.);
46772        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46773        let c = _mm256_set1_ps(1.);
46774        let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
46775        assert_eq_m256(r, c);
46776        let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
46777        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
46778        assert_eq_m256(r, e);
46779    }
46780
46781    #[simd_test(enable = "avx512f,avx512vl")]
46782    const fn test_mm_mask_fmadd_ps() {
46783        let a = _mm_set1_ps(1.);
46784        let b = _mm_set_ps(0., 1., 2., 3.);
46785        let c = _mm_set1_ps(1.);
46786        let r = _mm_mask_fmadd_ps(a, 0, b, c);
46787        assert_eq_m128(r, a);
46788        let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
46789        let e = _mm_set_ps(1., 2., 3., 4.);
46790        assert_eq_m128(r, e);
46791    }
46792
46793    #[simd_test(enable = "avx512f,avx512vl")]
46794    const fn test_mm_maskz_fmadd_ps() {
46795        let a = _mm_set1_ps(1.);
46796        let b = _mm_set_ps(0., 1., 2., 3.);
46797        let c = _mm_set1_ps(1.);
46798        let r = _mm_maskz_fmadd_ps(0, a, b, c);
46799        assert_eq_m128(r, _mm_setzero_ps());
46800        let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
46801        let e = _mm_set_ps(1., 2., 3., 4.);
46802        assert_eq_m128(r, e);
46803    }
46804
46805    #[simd_test(enable = "avx512f,avx512vl")]
46806    const fn test_mm_mask3_fmadd_ps() {
46807        let a = _mm_set1_ps(1.);
46808        let b = _mm_set_ps(0., 1., 2., 3.);
46809        let c = _mm_set1_ps(1.);
46810        let r = _mm_mask3_fmadd_ps(a, b, c, 0);
46811        assert_eq_m128(r, c);
46812        let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
46813        let e = _mm_set_ps(1., 2., 3., 4.);
46814        assert_eq_m128(r, e);
46815    }
46816
46817    #[simd_test(enable = "avx512f")]
46818    const fn test_mm512_fmsub_ps() {
46819        let a = _mm512_setr_ps(
46820            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
46821        );
46822        let b = _mm512_setr_ps(
46823            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46824        );
46825        let c = _mm512_setr_ps(
46826            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
46827        );
46828        let r = _mm512_fmsub_ps(a, b, c);
46829        let e = _mm512_setr_ps(
46830            -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
46831        );
46832        assert_eq_m512(r, e);
46833    }
46834
46835    #[simd_test(enable = "avx512f")]
46836    const fn test_mm512_mask_fmsub_ps() {
46837        let a = _mm512_set1_ps(1.);
46838        let b = _mm512_setr_ps(
46839            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46840        );
46841        let c = _mm512_set1_ps(1.);
46842        let r = _mm512_mask_fmsub_ps(a, 0, b, c);
46843        assert_eq_m512(r, a);
46844        let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
46845        let e = _mm512_setr_ps(
46846            -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
46847        );
46848        assert_eq_m512(r, e);
46849    }
46850
46851    #[simd_test(enable = "avx512f")]
46852    const fn test_mm512_maskz_fmsub_ps() {
46853        let a = _mm512_set1_ps(1.);
46854        let b = _mm512_setr_ps(
46855            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46856        );
46857        let c = _mm512_set1_ps(1.);
46858        let r = _mm512_maskz_fmsub_ps(0, a, b, c);
46859        assert_eq_m512(r, _mm512_setzero_ps());
46860        let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
46861        let e = _mm512_setr_ps(
46862            -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
46863        );
46864        assert_eq_m512(r, e);
46865    }
46866
46867    #[simd_test(enable = "avx512f")]
46868    const fn test_mm512_mask3_fmsub_ps() {
46869        let a = _mm512_set1_ps(1.);
46870        let b = _mm512_setr_ps(
46871            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46872        );
46873        let c = _mm512_setr_ps(
46874            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
46875        );
46876        let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
46877        assert_eq_m512(r, c);
46878        let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
46879        let e = _mm512_setr_ps(
46880            -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
46881        );
46882        assert_eq_m512(r, e);
46883    }
46884
46885    #[simd_test(enable = "avx512f,avx512vl")]
46886    const fn test_mm256_mask_fmsub_ps() {
46887        let a = _mm256_set1_ps(1.);
46888        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46889        let c = _mm256_set1_ps(1.);
46890        let r = _mm256_mask_fmsub_ps(a, 0, b, c);
46891        assert_eq_m256(r, a);
46892        let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
46893        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
46894        assert_eq_m256(r, e);
46895    }
46896
46897    #[simd_test(enable = "avx512f,avx512vl")]
46898    const fn test_mm256_maskz_fmsub_ps() {
46899        let a = _mm256_set1_ps(1.);
46900        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46901        let c = _mm256_set1_ps(1.);
46902        let r = _mm256_maskz_fmsub_ps(0, a, b, c);
46903        assert_eq_m256(r, _mm256_setzero_ps());
46904        let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
46905        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
46906        assert_eq_m256(r, e);
46907    }
46908
46909    #[simd_test(enable = "avx512f,avx512vl")]
46910    const fn test_mm256_mask3_fmsub_ps() {
46911        let a = _mm256_set1_ps(1.);
46912        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
46913        let c = _mm256_set1_ps(1.);
46914        let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
46915        assert_eq_m256(r, c);
46916        let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
46917        let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
46918        assert_eq_m256(r, e);
46919    }
46920
46921    #[simd_test(enable = "avx512f,avx512vl")]
46922    const fn test_mm_mask_fmsub_ps() {
46923        let a = _mm_set1_ps(1.);
46924        let b = _mm_set_ps(0., 1., 2., 3.);
46925        let c = _mm_set1_ps(1.);
46926        let r = _mm_mask_fmsub_ps(a, 0, b, c);
46927        assert_eq_m128(r, a);
46928        let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
46929        let e = _mm_set_ps(-1., 0., 1., 2.);
46930        assert_eq_m128(r, e);
46931    }
46932
46933    #[simd_test(enable = "avx512f,avx512vl")]
46934    const fn test_mm_maskz_fmsub_ps() {
46935        let a = _mm_set1_ps(1.);
46936        let b = _mm_set_ps(0., 1., 2., 3.);
46937        let c = _mm_set1_ps(1.);
46938        let r = _mm_maskz_fmsub_ps(0, a, b, c);
46939        assert_eq_m128(r, _mm_setzero_ps());
46940        let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
46941        let e = _mm_set_ps(-1., 0., 1., 2.);
46942        assert_eq_m128(r, e);
46943    }
46944
46945    #[simd_test(enable = "avx512f,avx512vl")]
46946    const fn test_mm_mask3_fmsub_ps() {
46947        let a = _mm_set1_ps(1.);
46948        let b = _mm_set_ps(0., 1., 2., 3.);
46949        let c = _mm_set1_ps(1.);
46950        let r = _mm_mask3_fmsub_ps(a, b, c, 0);
46951        assert_eq_m128(r, c);
46952        let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
46953        let e = _mm_set_ps(-1., 0., 1., 2.);
46954        assert_eq_m128(r, e);
46955    }
46956
46957    #[simd_test(enable = "avx512f")]
46958    const fn test_mm512_fmaddsub_ps() {
46959        let a = _mm512_set1_ps(1.);
46960        let b = _mm512_setr_ps(
46961            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46962        );
46963        let c = _mm512_set1_ps(1.);
46964        let r = _mm512_fmaddsub_ps(a, b, c);
46965        let e = _mm512_setr_ps(
46966            -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
46967        );
46968        assert_eq_m512(r, e);
46969    }
46970
46971    #[simd_test(enable = "avx512f")]
46972    const fn test_mm512_mask_fmaddsub_ps() {
46973        let a = _mm512_set1_ps(1.);
46974        let b = _mm512_setr_ps(
46975            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46976        );
46977        let c = _mm512_set1_ps(1.);
46978        let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
46979        assert_eq_m512(r, a);
46980        let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
46981        let e = _mm512_setr_ps(
46982            -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46983        );
46984        assert_eq_m512(r, e);
46985    }
46986
46987    #[simd_test(enable = "avx512f")]
46988    const fn test_mm512_maskz_fmaddsub_ps() {
46989        let a = _mm512_set1_ps(1.);
46990        let b = _mm512_setr_ps(
46991            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
46992        );
46993        let c = _mm512_set1_ps(1.);
46994        let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
46995        assert_eq_m512(r, _mm512_setzero_ps());
46996        let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
46997        let e = _mm512_setr_ps(
46998            -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46999        );
47000        assert_eq_m512(r, e);
47001    }
47002
47003    #[simd_test(enable = "avx512f")]
47004    const fn test_mm512_mask3_fmaddsub_ps() {
47005        let a = _mm512_set1_ps(1.);
47006        let b = _mm512_setr_ps(
47007            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47008        );
47009        let c = _mm512_setr_ps(
47010            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47011        );
47012        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
47013        assert_eq_m512(r, c);
47014        let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
47015        let e = _mm512_setr_ps(
47016            -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
47017        );
47018        assert_eq_m512(r, e);
47019    }
47020
47021    #[simd_test(enable = "avx512f,avx512vl")]
47022    const fn test_mm256_mask_fmaddsub_ps() {
47023        let a = _mm256_set1_ps(1.);
47024        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47025        let c = _mm256_set1_ps(1.);
47026        let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
47027        assert_eq_m256(r, a);
47028        let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
47029        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
47030        assert_eq_m256(r, e);
47031    }
47032
47033    #[simd_test(enable = "avx512f,avx512vl")]
47034    const fn test_mm256_maskz_fmaddsub_ps() {
47035        let a = _mm256_set1_ps(1.);
47036        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47037        let c = _mm256_set1_ps(1.);
47038        let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
47039        assert_eq_m256(r, _mm256_setzero_ps());
47040        let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
47041        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
47042        assert_eq_m256(r, e);
47043    }
47044
47045    #[simd_test(enable = "avx512f,avx512vl")]
47046    const fn test_mm256_mask3_fmaddsub_ps() {
47047        let a = _mm256_set1_ps(1.);
47048        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47049        let c = _mm256_set1_ps(1.);
47050        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
47051        assert_eq_m256(r, c);
47052        let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
47053        let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
47054        assert_eq_m256(r, e);
47055    }
47056
47057    #[simd_test(enable = "avx512f,avx512vl")]
47058    const fn test_mm_mask_fmaddsub_ps() {
47059        let a = _mm_set1_ps(1.);
47060        let b = _mm_set_ps(0., 1., 2., 3.);
47061        let c = _mm_set1_ps(1.);
47062        let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
47063        assert_eq_m128(r, a);
47064        let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
47065        let e = _mm_set_ps(1., 0., 3., 2.);
47066        assert_eq_m128(r, e);
47067    }
47068
47069    #[simd_test(enable = "avx512f,avx512vl")]
47070    const fn test_mm_maskz_fmaddsub_ps() {
47071        let a = _mm_set1_ps(1.);
47072        let b = _mm_set_ps(0., 1., 2., 3.);
47073        let c = _mm_set1_ps(1.);
47074        let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
47075        assert_eq_m128(r, _mm_setzero_ps());
47076        let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
47077        let e = _mm_set_ps(1., 0., 3., 2.);
47078        assert_eq_m128(r, e);
47079    }
47080
47081    #[simd_test(enable = "avx512f,avx512vl")]
47082    const fn test_mm_mask3_fmaddsub_ps() {
47083        let a = _mm_set1_ps(1.);
47084        let b = _mm_set_ps(0., 1., 2., 3.);
47085        let c = _mm_set1_ps(1.);
47086        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
47087        assert_eq_m128(r, c);
47088        let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
47089        let e = _mm_set_ps(1., 0., 3., 2.);
47090        assert_eq_m128(r, e);
47091    }
47092
47093    #[simd_test(enable = "avx512f")]
47094    const fn test_mm512_fmsubadd_ps() {
47095        let a = _mm512_setr_ps(
47096            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
47097        );
47098        let b = _mm512_setr_ps(
47099            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47100        );
47101        let c = _mm512_setr_ps(
47102            1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
47103        );
47104        let r = _mm512_fmsubadd_ps(a, b, c);
47105        let e = _mm512_setr_ps(
47106            1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
47107        );
47108        assert_eq_m512(r, e);
47109    }
47110
47111    #[simd_test(enable = "avx512f")]
47112    const fn test_mm512_mask_fmsubadd_ps() {
47113        let a = _mm512_set1_ps(1.);
47114        let b = _mm512_setr_ps(
47115            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47116        );
47117        let c = _mm512_set1_ps(1.);
47118        let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
47119        assert_eq_m512(r, a);
47120        let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
47121        let e = _mm512_setr_ps(
47122            1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
47123        );
47124        assert_eq_m512(r, e);
47125    }
47126
47127    #[simd_test(enable = "avx512f")]
47128    const fn test_mm512_maskz_fmsubadd_ps() {
47129        let a = _mm512_set1_ps(1.);
47130        let b = _mm512_setr_ps(
47131            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47132        );
47133        let c = _mm512_set1_ps(1.);
47134        let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
47135        assert_eq_m512(r, _mm512_setzero_ps());
47136        let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
47137        let e = _mm512_setr_ps(
47138            1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
47139        );
47140        assert_eq_m512(r, e);
47141    }
47142
47143    #[simd_test(enable = "avx512f")]
47144    const fn test_mm512_mask3_fmsubadd_ps() {
47145        let a = _mm512_set1_ps(1.);
47146        let b = _mm512_setr_ps(
47147            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47148        );
47149        let c = _mm512_setr_ps(
47150            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47151        );
47152        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
47153        assert_eq_m512(r, c);
47154        let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
47155        let e = _mm512_setr_ps(
47156            1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
47157        );
47158        assert_eq_m512(r, e);
47159    }
47160
47161    #[simd_test(enable = "avx512f,avx512vl")]
47162    const fn test_mm256_mask_fmsubadd_ps() {
47163        let a = _mm256_set1_ps(1.);
47164        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47165        let c = _mm256_set1_ps(1.);
47166        let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
47167        assert_eq_m256(r, a);
47168        let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
47169        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
47170        assert_eq_m256(r, e);
47171    }
47172
47173    #[simd_test(enable = "avx512f,avx512vl")]
47174    const fn test_mm256_maskz_fmsubadd_ps() {
47175        let a = _mm256_set1_ps(1.);
47176        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47177        let c = _mm256_set1_ps(1.);
47178        let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
47179        assert_eq_m256(r, _mm256_setzero_ps());
47180        let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
47181        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
47182        assert_eq_m256(r, e);
47183    }
47184
47185    #[simd_test(enable = "avx512f,avx512vl")]
47186    const fn test_mm256_mask3_fmsubadd_ps() {
47187        let a = _mm256_set1_ps(1.);
47188        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47189        let c = _mm256_set1_ps(1.);
47190        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
47191        assert_eq_m256(r, c);
47192        let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
47193        let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
47194        assert_eq_m256(r, e);
47195    }
47196
47197    #[simd_test(enable = "avx512f,avx512vl")]
47198    const fn test_mm_mask_fmsubadd_ps() {
47199        let a = _mm_set1_ps(1.);
47200        let b = _mm_set_ps(0., 1., 2., 3.);
47201        let c = _mm_set1_ps(1.);
47202        let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
47203        assert_eq_m128(r, a);
47204        let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
47205        let e = _mm_set_ps(-1., 2., 1., 4.);
47206        assert_eq_m128(r, e);
47207    }
47208
47209    #[simd_test(enable = "avx512f,avx512vl")]
47210    const fn test_mm_maskz_fmsubadd_ps() {
47211        let a = _mm_set1_ps(1.);
47212        let b = _mm_set_ps(0., 1., 2., 3.);
47213        let c = _mm_set1_ps(1.);
47214        let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
47215        assert_eq_m128(r, _mm_setzero_ps());
47216        let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
47217        let e = _mm_set_ps(-1., 2., 1., 4.);
47218        assert_eq_m128(r, e);
47219    }
47220
47221    #[simd_test(enable = "avx512f,avx512vl")]
47222    const fn test_mm_mask3_fmsubadd_ps() {
47223        let a = _mm_set1_ps(1.);
47224        let b = _mm_set_ps(0., 1., 2., 3.);
47225        let c = _mm_set1_ps(1.);
47226        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
47227        assert_eq_m128(r, c);
47228        let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
47229        let e = _mm_set_ps(-1., 2., 1., 4.);
47230        assert_eq_m128(r, e);
47231    }
47232
47233    #[simd_test(enable = "avx512f")]
47234    const fn test_mm512_fnmadd_ps() {
47235        let a = _mm512_set1_ps(1.);
47236        let b = _mm512_setr_ps(
47237            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47238        );
47239        let c = _mm512_set1_ps(1.);
47240        let r = _mm512_fnmadd_ps(a, b, c);
47241        let e = _mm512_setr_ps(
47242            1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
47243        );
47244        assert_eq_m512(r, e);
47245    }
47246
47247    #[simd_test(enable = "avx512f")]
47248    const fn test_mm512_mask_fnmadd_ps() {
47249        let a = _mm512_set1_ps(1.);
47250        let b = _mm512_setr_ps(
47251            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47252        );
47253        let c = _mm512_set1_ps(1.);
47254        let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
47255        assert_eq_m512(r, a);
47256        let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
47257        let e = _mm512_setr_ps(
47258            1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
47259        );
47260        assert_eq_m512(r, e);
47261    }
47262
47263    #[simd_test(enable = "avx512f")]
47264    const fn test_mm512_maskz_fnmadd_ps() {
47265        let a = _mm512_set1_ps(1.);
47266        let b = _mm512_setr_ps(
47267            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47268        );
47269        let c = _mm512_set1_ps(1.);
47270        let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
47271        assert_eq_m512(r, _mm512_setzero_ps());
47272        let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
47273        let e = _mm512_setr_ps(
47274            1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
47275        );
47276        assert_eq_m512(r, e);
47277    }
47278
47279    #[simd_test(enable = "avx512f")]
47280    const fn test_mm512_mask3_fnmadd_ps() {
47281        let a = _mm512_set1_ps(1.);
47282        let b = _mm512_setr_ps(
47283            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47284        );
47285        let c = _mm512_setr_ps(
47286            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47287        );
47288        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
47289        assert_eq_m512(r, c);
47290        let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
47291        let e = _mm512_setr_ps(
47292            1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
47293        );
47294        assert_eq_m512(r, e);
47295    }
47296
47297    #[simd_test(enable = "avx512f,avx512vl")]
47298    const fn test_mm256_mask_fnmadd_ps() {
47299        let a = _mm256_set1_ps(1.);
47300        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47301        let c = _mm256_set1_ps(1.);
47302        let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
47303        assert_eq_m256(r, a);
47304        let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
47305        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
47306        assert_eq_m256(r, e);
47307    }
47308
47309    #[simd_test(enable = "avx512f,avx512vl")]
47310    const fn test_mm256_maskz_fnmadd_ps() {
47311        let a = _mm256_set1_ps(1.);
47312        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47313        let c = _mm256_set1_ps(1.);
47314        let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
47315        assert_eq_m256(r, _mm256_setzero_ps());
47316        let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
47317        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
47318        assert_eq_m256(r, e);
47319    }
47320
47321    #[simd_test(enable = "avx512f,avx512vl")]
47322    const fn test_mm256_mask3_fnmadd_ps() {
47323        let a = _mm256_set1_ps(1.);
47324        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47325        let c = _mm256_set1_ps(1.);
47326        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
47327        assert_eq_m256(r, c);
47328        let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
47329        let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
47330        assert_eq_m256(r, e);
47331    }
47332
47333    #[simd_test(enable = "avx512f,avx512vl")]
47334    const fn test_mm_mask_fnmadd_ps() {
47335        let a = _mm_set1_ps(1.);
47336        let b = _mm_set_ps(0., 1., 2., 3.);
47337        let c = _mm_set1_ps(1.);
47338        let r = _mm_mask_fnmadd_ps(a, 0, b, c);
47339        assert_eq_m128(r, a);
47340        let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
47341        let e = _mm_set_ps(1., 0., -1., -2.);
47342        assert_eq_m128(r, e);
47343    }
47344
47345    #[simd_test(enable = "avx512f,avx512vl")]
47346    const fn test_mm_maskz_fnmadd_ps() {
47347        let a = _mm_set1_ps(1.);
47348        let b = _mm_set_ps(0., 1., 2., 3.);
47349        let c = _mm_set1_ps(1.);
47350        let r = _mm_maskz_fnmadd_ps(0, a, b, c);
47351        assert_eq_m128(r, _mm_setzero_ps());
47352        let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
47353        let e = _mm_set_ps(1., 0., -1., -2.);
47354        assert_eq_m128(r, e);
47355    }
47356
47357    #[simd_test(enable = "avx512f,avx512vl")]
47358    const fn test_mm_mask3_fnmadd_ps() {
47359        let a = _mm_set1_ps(1.);
47360        let b = _mm_set_ps(0., 1., 2., 3.);
47361        let c = _mm_set1_ps(1.);
47362        let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
47363        assert_eq_m128(r, c);
47364        let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
47365        let e = _mm_set_ps(1., 0., -1., -2.);
47366        assert_eq_m128(r, e);
47367    }
47368
47369    #[simd_test(enable = "avx512f")]
47370    const fn test_mm512_fnmsub_ps() {
47371        let a = _mm512_set1_ps(1.);
47372        let b = _mm512_setr_ps(
47373            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47374        );
47375        let c = _mm512_set1_ps(1.);
47376        let r = _mm512_fnmsub_ps(a, b, c);
47377        let e = _mm512_setr_ps(
47378            -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
47379        );
47380        assert_eq_m512(r, e);
47381    }
47382
47383    #[simd_test(enable = "avx512f")]
47384    const fn test_mm512_mask_fnmsub_ps() {
47385        let a = _mm512_set1_ps(1.);
47386        let b = _mm512_setr_ps(
47387            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47388        );
47389        let c = _mm512_set1_ps(1.);
47390        let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
47391        assert_eq_m512(r, a);
47392        let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
47393        let e = _mm512_setr_ps(
47394            -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
47395        );
47396        assert_eq_m512(r, e);
47397    }
47398
47399    #[simd_test(enable = "avx512f")]
47400    const fn test_mm512_maskz_fnmsub_ps() {
47401        let a = _mm512_set1_ps(1.);
47402        let b = _mm512_setr_ps(
47403            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47404        );
47405        let c = _mm512_set1_ps(1.);
47406        let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
47407        assert_eq_m512(r, _mm512_setzero_ps());
47408        let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
47409        let e = _mm512_setr_ps(
47410            -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
47411        );
47412        assert_eq_m512(r, e);
47413    }
47414
47415    #[simd_test(enable = "avx512f")]
47416    const fn test_mm512_mask3_fnmsub_ps() {
47417        let a = _mm512_set1_ps(1.);
47418        let b = _mm512_setr_ps(
47419            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47420        );
47421        let c = _mm512_setr_ps(
47422            1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
47423        );
47424        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
47425        assert_eq_m512(r, c);
47426        let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
47427        let e = _mm512_setr_ps(
47428            -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
47429        );
47430        assert_eq_m512(r, e);
47431    }
47432
47433    #[simd_test(enable = "avx512f,avx512vl")]
47434    const fn test_mm256_mask_fnmsub_ps() {
47435        let a = _mm256_set1_ps(1.);
47436        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47437        let c = _mm256_set1_ps(1.);
47438        let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
47439        assert_eq_m256(r, a);
47440        let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
47441        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
47442        assert_eq_m256(r, e);
47443    }
47444
47445    #[simd_test(enable = "avx512f,avx512vl")]
47446    const fn test_mm256_maskz_fnmsub_ps() {
47447        let a = _mm256_set1_ps(1.);
47448        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47449        let c = _mm256_set1_ps(1.);
47450        let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
47451        assert_eq_m256(r, _mm256_setzero_ps());
47452        let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
47453        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
47454        assert_eq_m256(r, e);
47455    }
47456
47457    #[simd_test(enable = "avx512f,avx512vl")]
47458    const fn test_mm256_mask3_fnmsub_ps() {
47459        let a = _mm256_set1_ps(1.);
47460        let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
47461        let c = _mm256_set1_ps(1.);
47462        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
47463        assert_eq_m256(r, c);
47464        let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
47465        let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
47466        assert_eq_m256(r, e);
47467    }
47468
47469    #[simd_test(enable = "avx512f,avx512vl")]
47470    const fn test_mm_mask_fnmsub_ps() {
47471        let a = _mm_set1_ps(1.);
47472        let b = _mm_set_ps(0., 1., 2., 3.);
47473        let c = _mm_set1_ps(1.);
47474        let r = _mm_mask_fnmsub_ps(a, 0, b, c);
47475        assert_eq_m128(r, a);
47476        let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
47477        let e = _mm_set_ps(-1., -2., -3., -4.);
47478        assert_eq_m128(r, e);
47479    }
47480
47481    #[simd_test(enable = "avx512f,avx512vl")]
47482    const fn test_mm_maskz_fnmsub_ps() {
47483        let a = _mm_set1_ps(1.);
47484        let b = _mm_set_ps(0., 1., 2., 3.);
47485        let c = _mm_set1_ps(1.);
47486        let r = _mm_maskz_fnmsub_ps(0, a, b, c);
47487        assert_eq_m128(r, _mm_setzero_ps());
47488        let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
47489        let e = _mm_set_ps(-1., -2., -3., -4.);
47490        assert_eq_m128(r, e);
47491    }
47492
47493    #[simd_test(enable = "avx512f,avx512vl")]
47494    const fn test_mm_mask3_fnmsub_ps() {
47495        let a = _mm_set1_ps(1.);
47496        let b = _mm_set_ps(0., 1., 2., 3.);
47497        let c = _mm_set1_ps(1.);
47498        let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
47499        assert_eq_m128(r, c);
47500        let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
47501        let e = _mm_set_ps(-1., -2., -3., -4.);
47502        assert_eq_m128(r, e);
47503    }
47504
47505    #[simd_test(enable = "avx512f")]
47506    fn test_mm512_rcp14_ps() {
47507        let a = _mm512_set1_ps(3.);
47508        let r = _mm512_rcp14_ps(a);
47509        let e = _mm512_set1_ps(0.33333206);
47510        assert_eq_m512(r, e);
47511    }
47512
47513    #[simd_test(enable = "avx512f")]
47514    fn test_mm512_mask_rcp14_ps() {
47515        let a = _mm512_set1_ps(3.);
47516        let r = _mm512_mask_rcp14_ps(a, 0, a);
47517        assert_eq_m512(r, a);
47518        let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
47519        let e = _mm512_setr_ps(
47520            3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
47521            0.33333206, 0.33333206, 0.33333206, 0.33333206,
47522        );
47523        assert_eq_m512(r, e);
47524    }
47525
47526    #[simd_test(enable = "avx512f")]
47527    fn test_mm512_maskz_rcp14_ps() {
47528        let a = _mm512_set1_ps(3.);
47529        let r = _mm512_maskz_rcp14_ps(0, a);
47530        assert_eq_m512(r, _mm512_setzero_ps());
47531        let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
47532        let e = _mm512_setr_ps(
47533            0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
47534            0.33333206, 0.33333206, 0.33333206, 0.33333206,
47535        );
47536        assert_eq_m512(r, e);
47537    }
47538
47539    #[simd_test(enable = "avx512f,avx512vl")]
47540    fn test_mm256_rcp14_ps() {
47541        let a = _mm256_set1_ps(3.);
47542        let r = _mm256_rcp14_ps(a);
47543        let e = _mm256_set1_ps(0.33333206);
47544        assert_eq_m256(r, e);
47545    }
47546
47547    #[simd_test(enable = "avx512f,avx512vl")]
47548    fn test_mm256_mask_rcp14_ps() {
47549        let a = _mm256_set1_ps(3.);
47550        let r = _mm256_mask_rcp14_ps(a, 0, a);
47551        assert_eq_m256(r, a);
47552        let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
47553        let e = _mm256_set1_ps(0.33333206);
47554        assert_eq_m256(r, e);
47555    }
47556
47557    #[simd_test(enable = "avx512f,avx512vl")]
47558    fn test_mm256_maskz_rcp14_ps() {
47559        let a = _mm256_set1_ps(3.);
47560        let r = _mm256_maskz_rcp14_ps(0, a);
47561        assert_eq_m256(r, _mm256_setzero_ps());
47562        let r = _mm256_maskz_rcp14_ps(0b11111111, a);
47563        let e = _mm256_set1_ps(0.33333206);
47564        assert_eq_m256(r, e);
47565    }
47566
47567    #[simd_test(enable = "avx512f,avx512vl")]
47568    fn test_mm_rcp14_ps() {
47569        let a = _mm_set1_ps(3.);
47570        let r = _mm_rcp14_ps(a);
47571        let e = _mm_set1_ps(0.33333206);
47572        assert_eq_m128(r, e);
47573    }
47574
47575    #[simd_test(enable = "avx512f,avx512vl")]
47576    fn test_mm_mask_rcp14_ps() {
47577        let a = _mm_set1_ps(3.);
47578        let r = _mm_mask_rcp14_ps(a, 0, a);
47579        assert_eq_m128(r, a);
47580        let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
47581        let e = _mm_set1_ps(0.33333206);
47582        assert_eq_m128(r, e);
47583    }
47584
47585    #[simd_test(enable = "avx512f,avx512vl")]
47586    fn test_mm_maskz_rcp14_ps() {
47587        let a = _mm_set1_ps(3.);
47588        let r = _mm_maskz_rcp14_ps(0, a);
47589        assert_eq_m128(r, _mm_setzero_ps());
47590        let r = _mm_maskz_rcp14_ps(0b00001111, a);
47591        let e = _mm_set1_ps(0.33333206);
47592        assert_eq_m128(r, e);
47593    }
47594
47595    #[simd_test(enable = "avx512f")]
47596    fn test_mm512_rsqrt14_ps() {
47597        let a = _mm512_set1_ps(3.);
47598        let r = _mm512_rsqrt14_ps(a);
47599        let e = _mm512_set1_ps(0.5773392);
47600        assert_eq_m512(r, e);
47601    }
47602
47603    #[simd_test(enable = "avx512f")]
47604    fn test_mm512_mask_rsqrt14_ps() {
47605        let a = _mm512_set1_ps(3.);
47606        let r = _mm512_mask_rsqrt14_ps(a, 0, a);
47607        assert_eq_m512(r, a);
47608        let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
47609        let e = _mm512_setr_ps(
47610            3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
47611            0.5773392, 0.5773392, 0.5773392,
47612        );
47613        assert_eq_m512(r, e);
47614    }
47615
47616    #[simd_test(enable = "avx512f")]
47617    fn test_mm512_maskz_rsqrt14_ps() {
47618        let a = _mm512_set1_ps(3.);
47619        let r = _mm512_maskz_rsqrt14_ps(0, a);
47620        assert_eq_m512(r, _mm512_setzero_ps());
47621        let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
47622        let e = _mm512_setr_ps(
47623            0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
47624            0.5773392, 0.5773392, 0.5773392,
47625        );
47626        assert_eq_m512(r, e);
47627    }
47628
47629    #[simd_test(enable = "avx512f,avx512vl")]
47630    fn test_mm256_rsqrt14_ps() {
47631        let a = _mm256_set1_ps(3.);
47632        let r = _mm256_rsqrt14_ps(a);
47633        let e = _mm256_set1_ps(0.5773392);
47634        assert_eq_m256(r, e);
47635    }
47636
47637    #[simd_test(enable = "avx512f,avx512vl")]
47638    fn test_mm256_mask_rsqrt14_ps() {
47639        let a = _mm256_set1_ps(3.);
47640        let r = _mm256_mask_rsqrt14_ps(a, 0, a);
47641        assert_eq_m256(r, a);
47642        let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
47643        let e = _mm256_set1_ps(0.5773392);
47644        assert_eq_m256(r, e);
47645    }
47646
47647    #[simd_test(enable = "avx512f,avx512vl")]
47648    fn test_mm256_maskz_rsqrt14_ps() {
47649        let a = _mm256_set1_ps(3.);
47650        let r = _mm256_maskz_rsqrt14_ps(0, a);
47651        assert_eq_m256(r, _mm256_setzero_ps());
47652        let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
47653        let e = _mm256_set1_ps(0.5773392);
47654        assert_eq_m256(r, e);
47655    }
47656
47657    #[simd_test(enable = "avx512f,avx512vl")]
47658    fn test_mm_rsqrt14_ps() {
47659        let a = _mm_set1_ps(3.);
47660        let r = _mm_rsqrt14_ps(a);
47661        let e = _mm_set1_ps(0.5773392);
47662        assert_eq_m128(r, e);
47663    }
47664
47665    #[simd_test(enable = "avx512f,avx512vl")]
47666    fn test_mm_mask_rsqrt14_ps() {
47667        let a = _mm_set1_ps(3.);
47668        let r = _mm_mask_rsqrt14_ps(a, 0, a);
47669        assert_eq_m128(r, a);
47670        let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
47671        let e = _mm_set1_ps(0.5773392);
47672        assert_eq_m128(r, e);
47673    }
47674
47675    #[simd_test(enable = "avx512f,avx512vl")]
47676    fn test_mm_maskz_rsqrt14_ps() {
47677        let a = _mm_set1_ps(3.);
47678        let r = _mm_maskz_rsqrt14_ps(0, a);
47679        assert_eq_m128(r, _mm_setzero_ps());
47680        let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
47681        let e = _mm_set1_ps(0.5773392);
47682        assert_eq_m128(r, e);
47683    }
47684
47685    #[simd_test(enable = "avx512f")]
47686    fn test_mm512_getexp_ps() {
47687        let a = _mm512_set1_ps(3.);
47688        let r = _mm512_getexp_ps(a);
47689        let e = _mm512_set1_ps(1.);
47690        assert_eq_m512(r, e);
47691    }
47692
47693    #[simd_test(enable = "avx512f")]
47694    fn test_mm512_mask_getexp_ps() {
47695        let a = _mm512_set1_ps(3.);
47696        let r = _mm512_mask_getexp_ps(a, 0, a);
47697        assert_eq_m512(r, a);
47698        let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
47699        let e = _mm512_setr_ps(
47700            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47701        );
47702        assert_eq_m512(r, e);
47703    }
47704
47705    #[simd_test(enable = "avx512f")]
47706    fn test_mm512_maskz_getexp_ps() {
47707        let a = _mm512_set1_ps(3.);
47708        let r = _mm512_maskz_getexp_ps(0, a);
47709        assert_eq_m512(r, _mm512_setzero_ps());
47710        let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
47711        let e = _mm512_setr_ps(
47712            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47713        );
47714        assert_eq_m512(r, e);
47715    }
47716
47717    #[simd_test(enable = "avx512f,avx512vl")]
47718    fn test_mm256_getexp_ps() {
47719        let a = _mm256_set1_ps(3.);
47720        let r = _mm256_getexp_ps(a);
47721        let e = _mm256_set1_ps(1.);
47722        assert_eq_m256(r, e);
47723    }
47724
47725    #[simd_test(enable = "avx512f,avx512vl")]
47726    fn test_mm256_mask_getexp_ps() {
47727        let a = _mm256_set1_ps(3.);
47728        let r = _mm256_mask_getexp_ps(a, 0, a);
47729        assert_eq_m256(r, a);
47730        let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
47731        let e = _mm256_set1_ps(1.);
47732        assert_eq_m256(r, e);
47733    }
47734
47735    #[simd_test(enable = "avx512f,avx512vl")]
47736    fn test_mm256_maskz_getexp_ps() {
47737        let a = _mm256_set1_ps(3.);
47738        let r = _mm256_maskz_getexp_ps(0, a);
47739        assert_eq_m256(r, _mm256_setzero_ps());
47740        let r = _mm256_maskz_getexp_ps(0b11111111, a);
47741        let e = _mm256_set1_ps(1.);
47742        assert_eq_m256(r, e);
47743    }
47744
47745    #[simd_test(enable = "avx512f,avx512vl")]
47746    fn test_mm_getexp_ps() {
47747        let a = _mm_set1_ps(3.);
47748        let r = _mm_getexp_ps(a);
47749        let e = _mm_set1_ps(1.);
47750        assert_eq_m128(r, e);
47751    }
47752
47753    #[simd_test(enable = "avx512f,avx512vl")]
47754    fn test_mm_mask_getexp_ps() {
47755        let a = _mm_set1_ps(3.);
47756        let r = _mm_mask_getexp_ps(a, 0, a);
47757        assert_eq_m128(r, a);
47758        let r = _mm_mask_getexp_ps(a, 0b00001111, a);
47759        let e = _mm_set1_ps(1.);
47760        assert_eq_m128(r, e);
47761    }
47762
47763    #[simd_test(enable = "avx512f,avx512vl")]
47764    fn test_mm_maskz_getexp_ps() {
47765        let a = _mm_set1_ps(3.);
47766        let r = _mm_maskz_getexp_ps(0, a);
47767        assert_eq_m128(r, _mm_setzero_ps());
47768        let r = _mm_maskz_getexp_ps(0b00001111, a);
47769        let e = _mm_set1_ps(1.);
47770        assert_eq_m128(r, e);
47771    }
47772
47773    #[simd_test(enable = "avx512f")]
47774    fn test_mm512_roundscale_ps() {
47775        let a = _mm512_set1_ps(1.1);
47776        let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
47777        let e = _mm512_set1_ps(1.0);
47778        assert_eq_m512(r, e);
47779    }
47780
47781    #[simd_test(enable = "avx512f")]
47782    fn test_mm512_mask_roundscale_ps() {
47783        let a = _mm512_set1_ps(1.1);
47784        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
47785        let e = _mm512_set1_ps(1.1);
47786        assert_eq_m512(r, e);
47787        let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
47788        let e = _mm512_set1_ps(1.0);
47789        assert_eq_m512(r, e);
47790    }
47791
47792    #[simd_test(enable = "avx512f")]
47793    fn test_mm512_maskz_roundscale_ps() {
47794        let a = _mm512_set1_ps(1.1);
47795        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
47796        assert_eq_m512(r, _mm512_setzero_ps());
47797        let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
47798        let e = _mm512_set1_ps(1.0);
47799        assert_eq_m512(r, e);
47800    }
47801
47802    #[simd_test(enable = "avx512f,avx512vl")]
47803    fn test_mm256_roundscale_ps() {
47804        let a = _mm256_set1_ps(1.1);
47805        let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
47806        let e = _mm256_set1_ps(1.0);
47807        assert_eq_m256(r, e);
47808    }
47809
47810    #[simd_test(enable = "avx512f,avx512vl")]
47811    fn test_mm256_mask_roundscale_ps() {
47812        let a = _mm256_set1_ps(1.1);
47813        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
47814        let e = _mm256_set1_ps(1.1);
47815        assert_eq_m256(r, e);
47816        let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
47817        let e = _mm256_set1_ps(1.0);
47818        assert_eq_m256(r, e);
47819    }
47820
47821    #[simd_test(enable = "avx512f,avx512vl")]
47822    fn test_mm256_maskz_roundscale_ps() {
47823        let a = _mm256_set1_ps(1.1);
47824        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
47825        assert_eq_m256(r, _mm256_setzero_ps());
47826        let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
47827        let e = _mm256_set1_ps(1.0);
47828        assert_eq_m256(r, e);
47829    }
47830
47831    #[simd_test(enable = "avx512f,avx512vl")]
47832    fn test_mm_roundscale_ps() {
47833        let a = _mm_set1_ps(1.1);
47834        let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
47835        let e = _mm_set1_ps(1.0);
47836        assert_eq_m128(r, e);
47837    }
47838
47839    #[simd_test(enable = "avx512f,avx512vl")]
47840    fn test_mm_mask_roundscale_ps() {
47841        let a = _mm_set1_ps(1.1);
47842        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
47843        let e = _mm_set1_ps(1.1);
47844        assert_eq_m128(r, e);
47845        let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
47846        let e = _mm_set1_ps(1.0);
47847        assert_eq_m128(r, e);
47848    }
47849
47850    #[simd_test(enable = "avx512f,avx512vl")]
47851    fn test_mm_maskz_roundscale_ps() {
47852        let a = _mm_set1_ps(1.1);
47853        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
47854        assert_eq_m128(r, _mm_setzero_ps());
47855        let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
47856        let e = _mm_set1_ps(1.0);
47857        assert_eq_m128(r, e);
47858    }
47859
47860    #[simd_test(enable = "avx512f")]
47861    fn test_mm512_scalef_ps() {
47862        let a = _mm512_set1_ps(1.);
47863        let b = _mm512_set1_ps(3.);
47864        let r = _mm512_scalef_ps(a, b);
47865        let e = _mm512_set1_ps(8.);
47866        assert_eq_m512(r, e);
47867    }
47868
47869    #[simd_test(enable = "avx512f")]
47870    fn test_mm512_mask_scalef_ps() {
47871        let a = _mm512_set1_ps(1.);
47872        let b = _mm512_set1_ps(3.);
47873        let r = _mm512_mask_scalef_ps(a, 0, a, b);
47874        assert_eq_m512(r, a);
47875        let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
47876        let e = _mm512_set_ps(
47877            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47878        );
47879        assert_eq_m512(r, e);
47880    }
47881
47882    #[simd_test(enable = "avx512f")]
47883    fn test_mm512_maskz_scalef_ps() {
47884        let a = _mm512_set1_ps(1.);
47885        let b = _mm512_set1_ps(3.);
47886        let r = _mm512_maskz_scalef_ps(0, a, b);
47887        assert_eq_m512(r, _mm512_setzero_ps());
47888        let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
47889        let e = _mm512_set_ps(
47890            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47891        );
47892        assert_eq_m512(r, e);
47893    }
47894
47895    #[simd_test(enable = "avx512f,avx512vl")]
47896    fn test_mm256_scalef_ps() {
47897        let a = _mm256_set1_ps(1.);
47898        let b = _mm256_set1_ps(3.);
47899        let r = _mm256_scalef_ps(a, b);
47900        let e = _mm256_set1_ps(8.);
47901        assert_eq_m256(r, e);
47902    }
47903
47904    #[simd_test(enable = "avx512f,avx512vl")]
47905    fn test_mm256_mask_scalef_ps() {
47906        let a = _mm256_set1_ps(1.);
47907        let b = _mm256_set1_ps(3.);
47908        let r = _mm256_mask_scalef_ps(a, 0, a, b);
47909        assert_eq_m256(r, a);
47910        let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
47911        let e = _mm256_set1_ps(8.);
47912        assert_eq_m256(r, e);
47913    }
47914
47915    #[simd_test(enable = "avx512f,avx512vl")]
47916    fn test_mm256_maskz_scalef_ps() {
47917        let a = _mm256_set1_ps(1.);
47918        let b = _mm256_set1_ps(3.);
47919        let r = _mm256_maskz_scalef_ps(0, a, b);
47920        assert_eq_m256(r, _mm256_setzero_ps());
47921        let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
47922        let e = _mm256_set1_ps(8.);
47923        assert_eq_m256(r, e);
47924    }
47925
47926    #[simd_test(enable = "avx512f,avx512vl")]
47927    fn test_mm_scalef_ps() {
47928        let a = _mm_set1_ps(1.);
47929        let b = _mm_set1_ps(3.);
47930        let r = _mm_scalef_ps(a, b);
47931        let e = _mm_set1_ps(8.);
47932        assert_eq_m128(r, e);
47933    }
47934
47935    #[simd_test(enable = "avx512f,avx512vl")]
47936    fn test_mm_mask_scalef_ps() {
47937        let a = _mm_set1_ps(1.);
47938        let b = _mm_set1_ps(3.);
47939        let r = _mm_mask_scalef_ps(a, 0, a, b);
47940        assert_eq_m128(r, a);
47941        let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
47942        let e = _mm_set1_ps(8.);
47943        assert_eq_m128(r, e);
47944    }
47945
47946    #[simd_test(enable = "avx512f,avx512vl")]
47947    fn test_mm_maskz_scalef_ps() {
47948        let a = _mm_set1_ps(1.);
47949        let b = _mm_set1_ps(3.);
47950        let r = _mm_maskz_scalef_ps(0, a, b);
47951        assert_eq_m128(r, _mm_setzero_ps());
47952        let r = _mm_maskz_scalef_ps(0b00001111, a, b);
47953        let e = _mm_set1_ps(8.);
47954        assert_eq_m128(r, e);
47955    }
47956
47957    #[simd_test(enable = "avx512f")]
47958    fn test_mm512_fixupimm_ps() {
47959        let a = _mm512_set1_ps(f32::NAN);
47960        let b = _mm512_set1_ps(f32::MAX);
47961        let c = _mm512_set1_epi32(i32::MAX);
47962        //let r = _mm512_fixupimm_ps(a, b, c, 5);
47963        let r = _mm512_fixupimm_ps::<5>(a, b, c);
47964        let e = _mm512_set1_ps(0.0);
47965        assert_eq_m512(r, e);
47966    }
47967
47968    #[simd_test(enable = "avx512f")]
47969    fn test_mm512_mask_fixupimm_ps() {
47970        #[rustfmt::skip]
47971        let a = _mm512_set_ps(
47972            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47973            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47974            1., 1., 1., 1.,
47975            1., 1., 1., 1.,
47976        );
47977        let b = _mm512_set1_ps(f32::MAX);
47978        let c = _mm512_set1_epi32(i32::MAX);
47979        let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
47980        let e = _mm512_set_ps(
47981            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47982        );
47983        assert_eq_m512(r, e);
47984    }
47985
47986    #[simd_test(enable = "avx512f")]
47987    fn test_mm512_maskz_fixupimm_ps() {
47988        #[rustfmt::skip]
47989        let a = _mm512_set_ps(
47990            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47991            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47992            1., 1., 1., 1.,
47993            1., 1., 1., 1.,
47994        );
47995        let b = _mm512_set1_ps(f32::MAX);
47996        let c = _mm512_set1_epi32(i32::MAX);
47997        let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
47998        let e = _mm512_set_ps(
47999            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
48000        );
48001        assert_eq_m512(r, e);
48002    }
48003
48004    #[simd_test(enable = "avx512f,avx512vl")]
48005    fn test_mm256_fixupimm_ps() {
48006        let a = _mm256_set1_ps(f32::NAN);
48007        let b = _mm256_set1_ps(f32::MAX);
48008        let c = _mm256_set1_epi32(i32::MAX);
48009        let r = _mm256_fixupimm_ps::<5>(a, b, c);
48010        let e = _mm256_set1_ps(0.0);
48011        assert_eq_m256(r, e);
48012    }
48013
48014    #[simd_test(enable = "avx512f,avx512vl")]
48015    fn test_mm256_mask_fixupimm_ps() {
48016        let a = _mm256_set1_ps(f32::NAN);
48017        let b = _mm256_set1_ps(f32::MAX);
48018        let c = _mm256_set1_epi32(i32::MAX);
48019        let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
48020        let e = _mm256_set1_ps(0.0);
48021        assert_eq_m256(r, e);
48022    }
48023
48024    #[simd_test(enable = "avx512f,avx512vl")]
48025    fn test_mm256_maskz_fixupimm_ps() {
48026        let a = _mm256_set1_ps(f32::NAN);
48027        let b = _mm256_set1_ps(f32::MAX);
48028        let c = _mm256_set1_epi32(i32::MAX);
48029        let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
48030        let e = _mm256_set1_ps(0.0);
48031        assert_eq_m256(r, e);
48032    }
48033
48034    #[simd_test(enable = "avx512f,avx512vl")]
48035    fn test_mm_fixupimm_ps() {
48036        let a = _mm_set1_ps(f32::NAN);
48037        let b = _mm_set1_ps(f32::MAX);
48038        let c = _mm_set1_epi32(i32::MAX);
48039        let r = _mm_fixupimm_ps::<5>(a, b, c);
48040        let e = _mm_set1_ps(0.0);
48041        assert_eq_m128(r, e);
48042    }
48043
48044    #[simd_test(enable = "avx512f,avx512vl")]
48045    fn test_mm_mask_fixupimm_ps() {
48046        let a = _mm_set1_ps(f32::NAN);
48047        let b = _mm_set1_ps(f32::MAX);
48048        let c = _mm_set1_epi32(i32::MAX);
48049        let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
48050        let e = _mm_set1_ps(0.0);
48051        assert_eq_m128(r, e);
48052    }
48053
48054    #[simd_test(enable = "avx512f,avx512vl")]
48055    fn test_mm_maskz_fixupimm_ps() {
48056        let a = _mm_set1_ps(f32::NAN);
48057        let b = _mm_set1_ps(f32::MAX);
48058        let c = _mm_set1_epi32(i32::MAX);
48059        let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
48060        let e = _mm_set1_ps(0.0);
48061        assert_eq_m128(r, e);
48062    }
48063
48064    #[simd_test(enable = "avx512f")]
48065    unsafe fn test_mm512_ternarylogic_epi32() {
48066        use core::intrinsics::simd::simd_xor;
48067
48068        let a = _mm512_set4_epi32(0b100, 0b110, 0b001, 0b101);
48069        let b = _mm512_set4_epi32(0b010, 0b011, 0b001, 0b110);
48070        let c = _mm512_set4_epi32(0b001, 0b000, 0b001, 0b111);
48071
48072        // Identity of A.
48073        let r = _mm512_ternarylogic_epi32::<0b1111_0000>(a, b, c);
48074        assert_eq_m512i(r, a);
48075
48076        // Bitwise xor.
48077        let r = _mm512_ternarylogic_epi32::<0b10010110>(a, b, c);
48078        let e = _mm512_set4_epi32(0b111, 0b101, 0b001, 0b100);
48079        assert_eq_m512i(r, e);
48080        assert_eq_m512i(r, simd_xor(simd_xor(a, b), c));
48081
48082        // Majority (2 or more bits set).
48083        let r = _mm512_ternarylogic_epi32::<0b1110_1000>(a, b, c);
48084        let e = _mm512_set4_epi32(0b000, 0b010, 0b001, 0b111);
48085        assert_eq_m512i(r, e);
48086    }
48087
48088    #[simd_test(enable = "avx512f")]
48089    fn test_mm512_mask_ternarylogic_epi32() {
48090        let src = _mm512_set1_epi32(1 << 2);
48091        let a = _mm512_set1_epi32(1 << 1);
48092        let b = _mm512_set1_epi32(1 << 0);
48093        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
48094        assert_eq_m512i(r, src);
48095        let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
48096        let e = _mm512_set1_epi32(0);
48097        assert_eq_m512i(r, e);
48098    }
48099
48100    #[simd_test(enable = "avx512f")]
48101    fn test_mm512_maskz_ternarylogic_epi32() {
48102        let a = _mm512_set1_epi32(1 << 2);
48103        let b = _mm512_set1_epi32(1 << 1);
48104        let c = _mm512_set1_epi32(1 << 0);
48105        let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
48106        assert_eq_m512i(r, _mm512_setzero_si512());
48107        let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
48108        let e = _mm512_set1_epi32(0);
48109        assert_eq_m512i(r, e);
48110    }
48111
48112    #[simd_test(enable = "avx512f,avx512vl")]
48113    unsafe fn test_mm256_ternarylogic_epi32() {
48114        use core::intrinsics::simd::simd_xor;
48115
48116        let _mm256_set4_epi32 = |a, b, c, d| _mm256_setr_epi32(a, b, c, d, a, b, c, d);
48117
48118        let a = _mm256_set4_epi32(0b100, 0b110, 0b001, 0b101);
48119        let b = _mm256_set4_epi32(0b010, 0b011, 0b001, 0b110);
48120        let c = _mm256_set4_epi32(0b001, 0b000, 0b001, 0b111);
48121
48122        // Identity of A.
48123        let r = _mm256_ternarylogic_epi32::<0b1111_0000>(a, b, c);
48124        assert_eq_m256i(r, a);
48125
48126        // Bitwise xor.
48127        let r = _mm256_ternarylogic_epi32::<0b10010110>(a, b, c);
48128        let e = _mm256_set4_epi32(0b111, 0b101, 0b001, 0b100);
48129        assert_eq_m256i(r, e);
48130        assert_eq_m256i(r, simd_xor(simd_xor(a, b), c));
48131
48132        // Majority (2 or more bits set).
48133        let r = _mm256_ternarylogic_epi32::<0b1110_1000>(a, b, c);
48134        let e = _mm256_set4_epi32(0b000, 0b010, 0b001, 0b111);
48135        assert_eq_m256i(r, e);
48136    }
48137
48138    #[simd_test(enable = "avx512f,avx512vl")]
48139    fn test_mm256_mask_ternarylogic_epi32() {
48140        let src = _mm256_set1_epi32(1 << 2);
48141        let a = _mm256_set1_epi32(1 << 1);
48142        let b = _mm256_set1_epi32(1 << 0);
48143        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
48144        assert_eq_m256i(r, src);
48145        let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
48146        let e = _mm256_set1_epi32(0);
48147        assert_eq_m256i(r, e);
48148    }
48149
48150    #[simd_test(enable = "avx512f,avx512vl")]
48151    fn test_mm256_maskz_ternarylogic_epi32() {
48152        let a = _mm256_set1_epi32(1 << 2);
48153        let b = _mm256_set1_epi32(1 << 1);
48154        let c = _mm256_set1_epi32(1 << 0);
48155        let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
48156        assert_eq_m256i(r, _mm256_setzero_si256());
48157        let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
48158        let e = _mm256_set1_epi32(0);
48159        assert_eq_m256i(r, e);
48160    }
48161
48162    #[simd_test(enable = "avx512f,avx512vl")]
48163    unsafe fn test_mm_ternarylogic_epi32() {
48164        use core::intrinsics::simd::simd_xor;
48165
48166        let a = _mm_setr_epi32(0b100, 0b110, 0b001, 0b101);
48167        let b = _mm_setr_epi32(0b010, 0b011, 0b001, 0b110);
48168        let c = _mm_setr_epi32(0b001, 0b000, 0b001, 0b111);
48169
48170        // Identity of A.
48171        let r = _mm_ternarylogic_epi32::<0b1111_0000>(a, b, c);
48172        assert_eq_m128i(r, a);
48173
48174        // Bitwise xor.
48175        let r = _mm_ternarylogic_epi32::<0b10010110>(a, b, c);
48176        let e = _mm_setr_epi32(0b111, 0b101, 0b001, 0b100);
48177        assert_eq_m128i(r, e);
48178        assert_eq_m128i(r, simd_xor(simd_xor(a, b), c));
48179
48180        // Majority (2 or more bits set).
48181        let r = _mm_ternarylogic_epi32::<0b1110_1000>(a, b, c);
48182        let e = _mm_setr_epi32(0b000, 0b010, 0b001, 0b111);
48183        assert_eq_m128i(r, e);
48184    }
48185
48186    #[simd_test(enable = "avx512f,avx512vl")]
48187    fn test_mm_mask_ternarylogic_epi32() {
48188        let src = _mm_set1_epi32(1 << 2);
48189        let a = _mm_set1_epi32(1 << 1);
48190        let b = _mm_set1_epi32(1 << 0);
48191        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
48192        assert_eq_m128i(r, src);
48193        let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
48194        let e = _mm_set1_epi32(0);
48195        assert_eq_m128i(r, e);
48196    }
48197
48198    #[simd_test(enable = "avx512f,avx512vl")]
48199    fn test_mm_maskz_ternarylogic_epi32() {
48200        let a = _mm_set1_epi32(1 << 2);
48201        let b = _mm_set1_epi32(1 << 1);
48202        let c = _mm_set1_epi32(1 << 0);
48203        let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
48204        assert_eq_m128i(r, _mm_setzero_si128());
48205        let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
48206        let e = _mm_set1_epi32(0);
48207        assert_eq_m128i(r, e);
48208    }
48209
48210    #[simd_test(enable = "avx512f")]
48211    fn test_mm512_getmant_ps() {
48212        let a = _mm512_set1_ps(10.);
48213        let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48214        let e = _mm512_set1_ps(1.25);
48215        assert_eq_m512(r, e);
48216    }
48217
48218    #[simd_test(enable = "avx512f")]
48219    fn test_mm512_mask_getmant_ps() {
48220        let a = _mm512_set1_ps(10.);
48221        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
48222        assert_eq_m512(r, a);
48223        let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
48224            a,
48225            0b11111111_00000000,
48226            a,
48227        );
48228        let e = _mm512_setr_ps(
48229            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
48230        );
48231        assert_eq_m512(r, e);
48232    }
48233
48234    #[simd_test(enable = "avx512f")]
48235    fn test_mm512_maskz_getmant_ps() {
48236        let a = _mm512_set1_ps(10.);
48237        let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
48238        assert_eq_m512(r, _mm512_setzero_ps());
48239        let r =
48240            _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
48241        let e = _mm512_setr_ps(
48242            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
48243        );
48244        assert_eq_m512(r, e);
48245    }
48246
48247    #[simd_test(enable = "avx512f,avx512vl")]
48248    fn test_mm256_getmant_ps() {
48249        let a = _mm256_set1_ps(10.);
48250        let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48251        let e = _mm256_set1_ps(1.25);
48252        assert_eq_m256(r, e);
48253    }
48254
48255    #[simd_test(enable = "avx512f,avx512vl")]
48256    fn test_mm256_mask_getmant_ps() {
48257        let a = _mm256_set1_ps(10.);
48258        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
48259        assert_eq_m256(r, a);
48260        let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
48261        let e = _mm256_set1_ps(1.25);
48262        assert_eq_m256(r, e);
48263    }
48264
48265    #[simd_test(enable = "avx512f,avx512vl")]
48266    fn test_mm256_maskz_getmant_ps() {
48267        let a = _mm256_set1_ps(10.);
48268        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
48269        assert_eq_m256(r, _mm256_setzero_ps());
48270        let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
48271        let e = _mm256_set1_ps(1.25);
48272        assert_eq_m256(r, e);
48273    }
48274
48275    #[simd_test(enable = "avx512f,avx512vl")]
48276    fn test_mm_getmant_ps() {
48277        let a = _mm_set1_ps(10.);
48278        let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
48279        let e = _mm_set1_ps(1.25);
48280        assert_eq_m128(r, e);
48281    }
48282
48283    #[simd_test(enable = "avx512f,avx512vl")]
48284    fn test_mm_mask_getmant_ps() {
48285        let a = _mm_set1_ps(10.);
48286        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
48287        assert_eq_m128(r, a);
48288        let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
48289        let e = _mm_set1_ps(1.25);
48290        assert_eq_m128(r, e);
48291    }
48292
48293    #[simd_test(enable = "avx512f,avx512vl")]
48294    fn test_mm_maskz_getmant_ps() {
48295        let a = _mm_set1_ps(10.);
48296        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
48297        assert_eq_m128(r, _mm_setzero_ps());
48298        let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
48299        let e = _mm_set1_ps(1.25);
48300        assert_eq_m128(r, e);
48301    }
48302
48303    #[simd_test(enable = "avx512f")]
48304    fn test_mm512_add_round_ps() {
48305        let a = _mm512_setr_ps(
48306            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48307        );
48308        let b = _mm512_set1_ps(-1.);
48309        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48310        #[rustfmt::skip]
48311        let e = _mm512_setr_ps(
48312            -1., 0.5, 1., 2.5,
48313            3., 4.5, 5., 6.5,
48314            7., 8.5, 9., 10.5,
48315            11., 12.5, 13., -0.99999994,
48316        );
48317        assert_eq_m512(r, e);
48318        let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48319        let e = _mm512_setr_ps(
48320            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
48321        );
48322        assert_eq_m512(r, e);
48323    }
48324
48325    #[simd_test(enable = "avx512f")]
48326    fn test_mm512_mask_add_round_ps() {
48327        let a = _mm512_setr_ps(
48328            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48329        );
48330        let b = _mm512_set1_ps(-1.);
48331        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
48332        assert_eq_m512(r, a);
48333        let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48334            a,
48335            0b11111111_00000000,
48336            a,
48337            b,
48338        );
48339        #[rustfmt::skip]
48340        let e = _mm512_setr_ps(
48341            0., 1.5, 2., 3.5,
48342            4., 5.5, 6., 7.5,
48343            7., 8.5, 9., 10.5,
48344            11., 12.5, 13., -0.99999994,
48345        );
48346        assert_eq_m512(r, e);
48347    }
48348
48349    #[simd_test(enable = "avx512f")]
48350    fn test_mm512_maskz_add_round_ps() {
48351        let a = _mm512_setr_ps(
48352            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48353        );
48354        let b = _mm512_set1_ps(-1.);
48355        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
48356        assert_eq_m512(r, _mm512_setzero_ps());
48357        let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48358            0b11111111_00000000,
48359            a,
48360            b,
48361        );
48362        #[rustfmt::skip]
48363        let e = _mm512_setr_ps(
48364            0., 0., 0., 0.,
48365            0., 0., 0., 0.,
48366            7., 8.5, 9., 10.5,
48367            11., 12.5, 13., -0.99999994,
48368        );
48369        assert_eq_m512(r, e);
48370    }
48371
48372    #[simd_test(enable = "avx512f")]
48373    fn test_mm512_sub_round_ps() {
48374        let a = _mm512_setr_ps(
48375            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48376        );
48377        let b = _mm512_set1_ps(1.);
48378        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48379        #[rustfmt::skip]
48380        let e = _mm512_setr_ps(
48381            -1., 0.5, 1., 2.5,
48382            3., 4.5, 5., 6.5,
48383            7., 8.5, 9., 10.5,
48384            11., 12.5, 13., -0.99999994,
48385        );
48386        assert_eq_m512(r, e);
48387        let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48388        let e = _mm512_setr_ps(
48389            -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
48390        );
48391        assert_eq_m512(r, e);
48392    }
48393
48394    #[simd_test(enable = "avx512f")]
48395    fn test_mm512_mask_sub_round_ps() {
48396        let a = _mm512_setr_ps(
48397            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48398        );
48399        let b = _mm512_set1_ps(1.);
48400        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48401            a, 0, a, b,
48402        );
48403        assert_eq_m512(r, a);
48404        let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48405            a,
48406            0b11111111_00000000,
48407            a,
48408            b,
48409        );
48410        #[rustfmt::skip]
48411        let e = _mm512_setr_ps(
48412            0., 1.5, 2., 3.5,
48413            4., 5.5, 6., 7.5,
48414            7., 8.5, 9., 10.5,
48415            11., 12.5, 13., -0.99999994,
48416        );
48417        assert_eq_m512(r, e);
48418    }
48419
48420    #[simd_test(enable = "avx512f")]
48421    fn test_mm512_maskz_sub_round_ps() {
48422        let a = _mm512_setr_ps(
48423            0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
48424        );
48425        let b = _mm512_set1_ps(1.);
48426        let r =
48427            _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
48428        assert_eq_m512(r, _mm512_setzero_ps());
48429        let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48430            0b11111111_00000000,
48431            a,
48432            b,
48433        );
48434        #[rustfmt::skip]
48435        let e = _mm512_setr_ps(
48436            0., 0., 0., 0.,
48437            0., 0., 0., 0.,
48438            7., 8.5, 9., 10.5,
48439            11., 12.5, 13., -0.99999994,
48440        );
48441        assert_eq_m512(r, e);
48442    }
48443
48444    #[simd_test(enable = "avx512f")]
48445    fn test_mm512_mul_round_ps() {
48446        #[rustfmt::skip]
48447        let a = _mm512_setr_ps(
48448            0., 1.5, 2., 3.5,
48449            4., 5.5, 6., 7.5,
48450            8., 9.5, 10., 11.5,
48451            12., 13.5, 14., 0.00000000000000000000007,
48452        );
48453        let b = _mm512_set1_ps(0.1);
48454        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48455        #[rustfmt::skip]
48456        let e = _mm512_setr_ps(
48457            0., 0.15, 0.2, 0.35,
48458            0.4, 0.55, 0.6, 0.75,
48459            0.8, 0.95, 1.0, 1.15,
48460            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
48461        );
48462        assert_eq_m512(r, e);
48463        let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48464        #[rustfmt::skip]
48465        let e = _mm512_setr_ps(
48466            0., 0.14999999, 0.2, 0.35,
48467            0.4, 0.54999995, 0.59999996, 0.75,
48468            0.8, 0.95, 1.0, 1.15,
48469            1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
48470        );
48471        assert_eq_m512(r, e);
48472    }
48473
48474    #[simd_test(enable = "avx512f")]
48475    fn test_mm512_mask_mul_round_ps() {
48476        #[rustfmt::skip]
48477        let a = _mm512_setr_ps(
48478            0., 1.5, 2., 3.5,
48479            4., 5.5, 6., 7.5,
48480            8., 9.5, 10., 11.5,
48481            12., 13.5, 14., 0.00000000000000000000007,
48482        );
48483        let b = _mm512_set1_ps(0.1);
48484        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48485            a, 0, a, b,
48486        );
48487        assert_eq_m512(r, a);
48488        let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48489            a,
48490            0b11111111_00000000,
48491            a,
48492            b,
48493        );
48494        #[rustfmt::skip]
48495        let e = _mm512_setr_ps(
48496            0., 1.5, 2., 3.5,
48497            4., 5.5, 6., 7.5,
48498            0.8, 0.95, 1.0, 1.15,
48499            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
48500        );
48501        assert_eq_m512(r, e);
48502    }
48503
48504    #[simd_test(enable = "avx512f")]
48505    fn test_mm512_maskz_mul_round_ps() {
48506        #[rustfmt::skip]
48507        let a = _mm512_setr_ps(
48508            0., 1.5, 2., 3.5,
48509            4., 5.5, 6., 7.5,
48510            8., 9.5, 10., 11.5,
48511            12., 13.5, 14., 0.00000000000000000000007,
48512        );
48513        let b = _mm512_set1_ps(0.1);
48514        let r =
48515            _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
48516        assert_eq_m512(r, _mm512_setzero_ps());
48517        let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48518            0b11111111_00000000,
48519            a,
48520            b,
48521        );
48522        #[rustfmt::skip]
48523        let e = _mm512_setr_ps(
48524            0., 0., 0., 0.,
48525            0., 0., 0., 0.,
48526            0.8, 0.95, 1.0, 1.15,
48527            1.2, 1.35, 1.4, 0.000000000000000000000007000001,
48528        );
48529        assert_eq_m512(r, e);
48530    }
48531
48532    #[simd_test(enable = "avx512f")]
48533    fn test_mm512_div_round_ps() {
48534        let a = _mm512_set1_ps(1.);
48535        let b = _mm512_set1_ps(3.);
48536        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
48537        let e = _mm512_set1_ps(0.33333334);
48538        assert_eq_m512(r, e);
48539        let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
48540        let e = _mm512_set1_ps(0.3333333);
48541        assert_eq_m512(r, e);
48542    }
48543
48544    #[simd_test(enable = "avx512f")]
48545    fn test_mm512_mask_div_round_ps() {
48546        let a = _mm512_set1_ps(1.);
48547        let b = _mm512_set1_ps(3.);
48548        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48549            a, 0, a, b,
48550        );
48551        assert_eq_m512(r, a);
48552        let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48553            a,
48554            0b11111111_00000000,
48555            a,
48556            b,
48557        );
48558        let e = _mm512_setr_ps(
48559            1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
48560            0.33333334, 0.33333334, 0.33333334, 0.33333334,
48561        );
48562        assert_eq_m512(r, e);
48563    }
48564
48565    #[simd_test(enable = "avx512f")]
48566    fn test_mm512_maskz_div_round_ps() {
48567        let a = _mm512_set1_ps(1.);
48568        let b = _mm512_set1_ps(3.);
48569        let r =
48570            _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
48571        assert_eq_m512(r, _mm512_setzero_ps());
48572        let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48573            0b11111111_00000000,
48574            a,
48575            b,
48576        );
48577        let e = _mm512_setr_ps(
48578            0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
48579            0.33333334, 0.33333334, 0.33333334, 0.33333334,
48580        );
48581        assert_eq_m512(r, e);
48582    }
48583
48584    #[simd_test(enable = "avx512f")]
48585    fn test_mm512_sqrt_round_ps() {
48586        let a = _mm512_set1_ps(3.);
48587        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48588        let e = _mm512_set1_ps(1.7320508);
48589        assert_eq_m512(r, e);
48590        let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
48591        let e = _mm512_set1_ps(1.7320509);
48592        assert_eq_m512(r, e);
48593    }
48594
48595    #[simd_test(enable = "avx512f")]
48596    fn test_mm512_mask_sqrt_round_ps() {
48597        let a = _mm512_set1_ps(3.);
48598        let r =
48599            _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
48600        assert_eq_m512(r, a);
48601        let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48602            a,
48603            0b11111111_00000000,
48604            a,
48605        );
48606        let e = _mm512_setr_ps(
48607            3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
48608            1.7320508, 1.7320508, 1.7320508,
48609        );
48610        assert_eq_m512(r, e);
48611    }
48612
48613    #[simd_test(enable = "avx512f")]
48614    fn test_mm512_maskz_sqrt_round_ps() {
48615        let a = _mm512_set1_ps(3.);
48616        let r =
48617            _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
48618        assert_eq_m512(r, _mm512_setzero_ps());
48619        let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48620            0b11111111_00000000,
48621            a,
48622        );
48623        let e = _mm512_setr_ps(
48624            0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
48625            1.7320508, 1.7320508, 1.7320508,
48626        );
48627        assert_eq_m512(r, e);
48628    }
48629
48630    #[simd_test(enable = "avx512f")]
48631    fn test_mm512_fmadd_round_ps() {
48632        let a = _mm512_set1_ps(0.00000007);
48633        let b = _mm512_set1_ps(1.);
48634        let c = _mm512_set1_ps(-1.);
48635        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48636        let e = _mm512_set1_ps(-0.99999994);
48637        assert_eq_m512(r, e);
48638        let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48639        let e = _mm512_set1_ps(-0.9999999);
48640        assert_eq_m512(r, e);
48641    }
48642
48643    #[simd_test(enable = "avx512f")]
48644    fn test_mm512_mask_fmadd_round_ps() {
48645        let a = _mm512_set1_ps(0.00000007);
48646        let b = _mm512_set1_ps(1.);
48647        let c = _mm512_set1_ps(-1.);
48648        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48649            a, 0, b, c,
48650        );
48651        assert_eq_m512(r, a);
48652        let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48653            a,
48654            0b00000000_11111111,
48655            b,
48656            c,
48657        );
48658        #[rustfmt::skip]
48659        let e = _mm512_setr_ps(
48660            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48661            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48662            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48663            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48664        );
48665        assert_eq_m512(r, e);
48666    }
48667
48668    #[simd_test(enable = "avx512f")]
48669    fn test_mm512_maskz_fmadd_round_ps() {
48670        let a = _mm512_set1_ps(0.00000007);
48671        let b = _mm512_set1_ps(1.);
48672        let c = _mm512_set1_ps(-1.);
48673        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48674            0, a, b, c,
48675        );
48676        assert_eq_m512(r, _mm512_setzero_ps());
48677        #[rustfmt::skip]
48678        let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48679            0b00000000_11111111,
48680            a,
48681            b,
48682            c,
48683        );
48684        #[rustfmt::skip]
48685        let e = _mm512_setr_ps(
48686            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48687            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48688            0., 0., 0., 0.,
48689            0., 0., 0., 0.,
48690        );
48691        assert_eq_m512(r, e);
48692    }
48693
48694    #[simd_test(enable = "avx512f")]
48695    fn test_mm512_mask3_fmadd_round_ps() {
48696        let a = _mm512_set1_ps(0.00000007);
48697        let b = _mm512_set1_ps(1.);
48698        let c = _mm512_set1_ps(-1.);
48699        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48700            a, b, c, 0,
48701        );
48702        assert_eq_m512(r, c);
48703        let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48704            a,
48705            b,
48706            c,
48707            0b00000000_11111111,
48708        );
48709        #[rustfmt::skip]
48710        let e = _mm512_setr_ps(
48711            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48712            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48713            -1., -1., -1., -1.,
48714            -1., -1., -1., -1.,
48715        );
48716        assert_eq_m512(r, e);
48717    }
48718
48719    #[simd_test(enable = "avx512f")]
48720    fn test_mm512_fmsub_round_ps() {
48721        let a = _mm512_set1_ps(0.00000007);
48722        let b = _mm512_set1_ps(1.);
48723        let c = _mm512_set1_ps(1.);
48724        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48725        let e = _mm512_set1_ps(-0.99999994);
48726        assert_eq_m512(r, e);
48727        let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48728        let e = _mm512_set1_ps(-0.9999999);
48729        assert_eq_m512(r, e);
48730    }
48731
48732    #[simd_test(enable = "avx512f")]
48733    fn test_mm512_mask_fmsub_round_ps() {
48734        let a = _mm512_set1_ps(0.00000007);
48735        let b = _mm512_set1_ps(1.);
48736        let c = _mm512_set1_ps(1.);
48737        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48738            a, 0, b, c,
48739        );
48740        assert_eq_m512(r, a);
48741        let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48742            a,
48743            0b00000000_11111111,
48744            b,
48745            c,
48746        );
48747        #[rustfmt::skip]
48748        let e = _mm512_setr_ps(
48749            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48750            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48751            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48752            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48753        );
48754        assert_eq_m512(r, e);
48755    }
48756
48757    #[simd_test(enable = "avx512f")]
48758    fn test_mm512_maskz_fmsub_round_ps() {
48759        let a = _mm512_set1_ps(0.00000007);
48760        let b = _mm512_set1_ps(1.);
48761        let c = _mm512_set1_ps(1.);
48762        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48763            0, a, b, c,
48764        );
48765        assert_eq_m512(r, _mm512_setzero_ps());
48766        let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48767            0b00000000_11111111,
48768            a,
48769            b,
48770            c,
48771        );
48772        #[rustfmt::skip]
48773        let e = _mm512_setr_ps(
48774            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48775            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48776            0., 0., 0., 0.,
48777            0., 0., 0., 0.,
48778        );
48779        assert_eq_m512(r, e);
48780    }
48781
48782    #[simd_test(enable = "avx512f")]
48783    fn test_mm512_mask3_fmsub_round_ps() {
48784        let a = _mm512_set1_ps(0.00000007);
48785        let b = _mm512_set1_ps(1.);
48786        let c = _mm512_set1_ps(1.);
48787        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48788            a, b, c, 0,
48789        );
48790        assert_eq_m512(r, c);
48791        let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48792            a,
48793            b,
48794            c,
48795            0b00000000_11111111,
48796        );
48797        #[rustfmt::skip]
48798        let e = _mm512_setr_ps(
48799            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48800            -0.99999994, -0.99999994, -0.99999994, -0.99999994,
48801            1., 1., 1., 1.,
48802            1., 1., 1., 1.,
48803        );
48804        assert_eq_m512(r, e);
48805    }
48806
48807    #[simd_test(enable = "avx512f")]
48808    fn test_mm512_fmaddsub_round_ps() {
48809        let a = _mm512_set1_ps(0.00000007);
48810        let b = _mm512_set1_ps(1.);
48811        let c = _mm512_set1_ps(-1.);
48812        let r =
48813            _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48814        #[rustfmt::skip]
48815        let e = _mm512_setr_ps(
48816            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48817            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48818            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48819            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48820        );
48821        assert_eq_m512(r, e);
48822        let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48823        let e = _mm512_setr_ps(
48824            1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
48825            -0.9999999, 1., -0.9999999, 1., -0.9999999,
48826        );
48827        assert_eq_m512(r, e);
48828    }
48829
48830    #[simd_test(enable = "avx512f")]
48831    fn test_mm512_mask_fmaddsub_round_ps() {
48832        let a = _mm512_set1_ps(0.00000007);
48833        let b = _mm512_set1_ps(1.);
48834        let c = _mm512_set1_ps(-1.);
48835        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48836            a, 0, b, c,
48837        );
48838        assert_eq_m512(r, a);
48839        let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48840            a,
48841            0b00000000_11111111,
48842            b,
48843            c,
48844        );
48845        #[rustfmt::skip]
48846        let e = _mm512_setr_ps(
48847            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48848            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48849            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48850            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48851        );
48852        assert_eq_m512(r, e);
48853    }
48854
48855    #[simd_test(enable = "avx512f")]
48856    fn test_mm512_maskz_fmaddsub_round_ps() {
48857        let a = _mm512_set1_ps(0.00000007);
48858        let b = _mm512_set1_ps(1.);
48859        let c = _mm512_set1_ps(-1.);
48860        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48861            0, a, b, c,
48862        );
48863        assert_eq_m512(r, _mm512_setzero_ps());
48864        let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48865            0b00000000_11111111,
48866            a,
48867            b,
48868            c,
48869        );
48870        #[rustfmt::skip]
48871        let e = _mm512_setr_ps(
48872            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48873            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48874            0., 0., 0., 0.,
48875            0., 0., 0., 0.,
48876        );
48877        assert_eq_m512(r, e);
48878    }
48879
48880    #[simd_test(enable = "avx512f")]
48881    fn test_mm512_mask3_fmaddsub_round_ps() {
48882        let a = _mm512_set1_ps(0.00000007);
48883        let b = _mm512_set1_ps(1.);
48884        let c = _mm512_set1_ps(-1.);
48885        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48886            a, b, c, 0,
48887        );
48888        assert_eq_m512(r, c);
48889        let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48890            a,
48891            b,
48892            c,
48893            0b00000000_11111111,
48894        );
48895        #[rustfmt::skip]
48896        let e = _mm512_setr_ps(
48897            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48898            1.0000001, -0.99999994, 1.0000001, -0.99999994,
48899            -1., -1., -1., -1.,
48900            -1., -1., -1., -1.,
48901        );
48902        assert_eq_m512(r, e);
48903    }
48904
48905    #[simd_test(enable = "avx512f")]
48906    fn test_mm512_fmsubadd_round_ps() {
48907        let a = _mm512_set1_ps(0.00000007);
48908        let b = _mm512_set1_ps(1.);
48909        let c = _mm512_set1_ps(-1.);
48910        let r =
48911            _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
48912        #[rustfmt::skip]
48913        let e = _mm512_setr_ps(
48914            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48915            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48916            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48917            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48918        );
48919        assert_eq_m512(r, e);
48920        let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
48921        let e = _mm512_setr_ps(
48922            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
48923            -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
48924        );
48925        assert_eq_m512(r, e);
48926    }
48927
48928    #[simd_test(enable = "avx512f")]
48929    fn test_mm512_mask_fmsubadd_round_ps() {
48930        let a = _mm512_set1_ps(0.00000007);
48931        let b = _mm512_set1_ps(1.);
48932        let c = _mm512_set1_ps(-1.);
48933        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48934            a, 0, b, c,
48935        );
48936        assert_eq_m512(r, a);
48937        let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48938            a,
48939            0b00000000_11111111,
48940            b,
48941            c,
48942        );
48943        #[rustfmt::skip]
48944        let e = _mm512_setr_ps(
48945            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48946            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48947            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48948            0.00000007, 0.00000007, 0.00000007, 0.00000007,
48949        );
48950        assert_eq_m512(r, e);
48951    }
48952
48953    #[simd_test(enable = "avx512f")]
48954    fn test_mm512_maskz_fmsubadd_round_ps() {
48955        let a = _mm512_set1_ps(0.00000007);
48956        let b = _mm512_set1_ps(1.);
48957        let c = _mm512_set1_ps(-1.);
48958        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48959            0, a, b, c,
48960        );
48961        assert_eq_m512(r, _mm512_setzero_ps());
48962        let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48963            0b00000000_11111111,
48964            a,
48965            b,
48966            c,
48967        );
48968        #[rustfmt::skip]
48969        let e = _mm512_setr_ps(
48970            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48971            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48972            0., 0., 0., 0.,
48973            0., 0., 0., 0.,
48974        );
48975        assert_eq_m512(r, e);
48976    }
48977
48978    #[simd_test(enable = "avx512f")]
48979    fn test_mm512_mask3_fmsubadd_round_ps() {
48980        let a = _mm512_set1_ps(0.00000007);
48981        let b = _mm512_set1_ps(1.);
48982        let c = _mm512_set1_ps(-1.);
48983        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48984            a, b, c, 0,
48985        );
48986        assert_eq_m512(r, c);
48987        let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48988            a,
48989            b,
48990            c,
48991            0b00000000_11111111,
48992        );
48993        #[rustfmt::skip]
48994        let e = _mm512_setr_ps(
48995            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48996            -0.99999994, 1.0000001, -0.99999994, 1.0000001,
48997            -1., -1., -1., -1.,
48998            -1., -1., -1., -1.,
48999        );
49000        assert_eq_m512(r, e);
49001    }
49002
49003    #[simd_test(enable = "avx512f")]
49004    fn test_mm512_fnmadd_round_ps() {
49005        let a = _mm512_set1_ps(0.00000007);
49006        let b = _mm512_set1_ps(1.);
49007        let c = _mm512_set1_ps(1.);
49008        let r =
49009            _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
49010        let e = _mm512_set1_ps(0.99999994);
49011        assert_eq_m512(r, e);
49012        let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
49013        let e = _mm512_set1_ps(0.9999999);
49014        assert_eq_m512(r, e);
49015    }
49016
49017    #[simd_test(enable = "avx512f")]
49018    fn test_mm512_mask_fnmadd_round_ps() {
49019        let a = _mm512_set1_ps(0.00000007);
49020        let b = _mm512_set1_ps(1.);
49021        let c = _mm512_set1_ps(1.);
49022        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49023            a, 0, b, c,
49024        );
49025        assert_eq_m512(r, a);
49026        let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49027            a,
49028            0b00000000_11111111,
49029            b,
49030            c,
49031        );
49032        let e = _mm512_setr_ps(
49033            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49034            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
49035            0.00000007, 0.00000007,
49036        );
49037        assert_eq_m512(r, e);
49038    }
49039
49040    #[simd_test(enable = "avx512f")]
49041    fn test_mm512_maskz_fnmadd_round_ps() {
49042        let a = _mm512_set1_ps(0.00000007);
49043        let b = _mm512_set1_ps(1.);
49044        let c = _mm512_set1_ps(1.);
49045        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49046            0, a, b, c,
49047        );
49048        assert_eq_m512(r, _mm512_setzero_ps());
49049        let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49050            0b00000000_11111111,
49051            a,
49052            b,
49053            c,
49054        );
49055        let e = _mm512_setr_ps(
49056            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49057            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
49058        );
49059        assert_eq_m512(r, e);
49060    }
49061
49062    #[simd_test(enable = "avx512f")]
49063    fn test_mm512_mask3_fnmadd_round_ps() {
49064        let a = _mm512_set1_ps(0.00000007);
49065        let b = _mm512_set1_ps(1.);
49066        let c = _mm512_set1_ps(1.);
49067        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49068            a, b, c, 0,
49069        );
49070        assert_eq_m512(r, c);
49071        let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49072            a,
49073            b,
49074            c,
49075            0b00000000_11111111,
49076        );
49077        let e = _mm512_setr_ps(
49078            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49079            0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
49080        );
49081        assert_eq_m512(r, e);
49082    }
49083
49084    #[simd_test(enable = "avx512f")]
49085    fn test_mm512_fnmsub_round_ps() {
49086        let a = _mm512_set1_ps(0.00000007);
49087        let b = _mm512_set1_ps(1.);
49088        let c = _mm512_set1_ps(-1.);
49089        let r =
49090            _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
49091        let e = _mm512_set1_ps(0.99999994);
49092        assert_eq_m512(r, e);
49093        let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
49094        let e = _mm512_set1_ps(0.9999999);
49095        assert_eq_m512(r, e);
49096    }
49097
49098    #[simd_test(enable = "avx512f")]
49099    fn test_mm512_mask_fnmsub_round_ps() {
49100        let a = _mm512_set1_ps(0.00000007);
49101        let b = _mm512_set1_ps(1.);
49102        let c = _mm512_set1_ps(-1.);
49103        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49104            a, 0, b, c,
49105        );
49106        assert_eq_m512(r, a);
49107        let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49108            a,
49109            0b00000000_11111111,
49110            b,
49111            c,
49112        );
49113        let e = _mm512_setr_ps(
49114            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49115            0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
49116            0.00000007, 0.00000007,
49117        );
49118        assert_eq_m512(r, e);
49119    }
49120
49121    #[simd_test(enable = "avx512f")]
49122    fn test_mm512_maskz_fnmsub_round_ps() {
49123        let a = _mm512_set1_ps(0.00000007);
49124        let b = _mm512_set1_ps(1.);
49125        let c = _mm512_set1_ps(-1.);
49126        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49127            0, a, b, c,
49128        );
49129        assert_eq_m512(r, _mm512_setzero_ps());
49130        let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49131            0b00000000_11111111,
49132            a,
49133            b,
49134            c,
49135        );
49136        let e = _mm512_setr_ps(
49137            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49138            0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
49139        );
49140        assert_eq_m512(r, e);
49141    }
49142
49143    #[simd_test(enable = "avx512f")]
49144    fn test_mm512_mask3_fnmsub_round_ps() {
49145        let a = _mm512_set1_ps(0.00000007);
49146        let b = _mm512_set1_ps(1.);
49147        let c = _mm512_set1_ps(-1.);
49148        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49149            a, b, c, 0,
49150        );
49151        assert_eq_m512(r, c);
49152        let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49153            a,
49154            b,
49155            c,
49156            0b00000000_11111111,
49157        );
49158        let e = _mm512_setr_ps(
49159            0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
49160            0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
49161        );
49162        assert_eq_m512(r, e);
49163    }
49164
49165    #[simd_test(enable = "avx512f")]
49166    fn test_mm512_max_round_ps() {
49167        let a = _mm512_setr_ps(
49168            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49169        );
49170        let b = _mm512_setr_ps(
49171            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49172        );
49173        let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
49174        let e = _mm512_setr_ps(
49175            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
49176        );
49177        assert_eq_m512(r, e);
49178    }
49179
49180    #[simd_test(enable = "avx512f")]
49181    fn test_mm512_mask_max_round_ps() {
49182        let a = _mm512_setr_ps(
49183            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49184        );
49185        let b = _mm512_setr_ps(
49186            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49187        );
49188        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
49189        assert_eq_m512(r, a);
49190        let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
49191        let e = _mm512_setr_ps(
49192            15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
49193        );
49194        assert_eq_m512(r, e);
49195    }
49196
49197    #[simd_test(enable = "avx512f")]
49198    fn test_mm512_maskz_max_round_ps() {
49199        let a = _mm512_setr_ps(
49200            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49201        );
49202        let b = _mm512_setr_ps(
49203            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49204        );
49205        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
49206        assert_eq_m512(r, _mm512_setzero_ps());
49207        let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
49208        let e = _mm512_setr_ps(
49209            15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
49210        );
49211        assert_eq_m512(r, e);
49212    }
49213
49214    #[simd_test(enable = "avx512f")]
49215    fn test_mm512_min_round_ps() {
49216        let a = _mm512_setr_ps(
49217            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49218        );
49219        let b = _mm512_setr_ps(
49220            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49221        );
49222        let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
49223        let e = _mm512_setr_ps(
49224            0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
49225        );
49226        assert_eq_m512(r, e);
49227    }
49228
49229    #[simd_test(enable = "avx512f")]
49230    fn test_mm512_mask_min_round_ps() {
49231        let a = _mm512_setr_ps(
49232            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49233        );
49234        let b = _mm512_setr_ps(
49235            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49236        );
49237        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
49238        assert_eq_m512(r, a);
49239        let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
49240        let e = _mm512_setr_ps(
49241            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49242        );
49243        assert_eq_m512(r, e);
49244    }
49245
49246    #[simd_test(enable = "avx512f")]
49247    fn test_mm512_maskz_min_round_ps() {
49248        let a = _mm512_setr_ps(
49249            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49250        );
49251        let b = _mm512_setr_ps(
49252            15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
49253        );
49254        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
49255        assert_eq_m512(r, _mm512_setzero_ps());
49256        let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
49257        let e = _mm512_setr_ps(
49258            0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
49259        );
49260        assert_eq_m512(r, e);
49261    }
49262
49263    #[simd_test(enable = "avx512f")]
49264    fn test_mm512_getexp_round_ps() {
49265        let a = _mm512_set1_ps(3.);
49266        let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
49267        let e = _mm512_set1_ps(1.);
49268        assert_eq_m512(r, e);
49269    }
49270
49271    #[simd_test(enable = "avx512f")]
49272    fn test_mm512_mask_getexp_round_ps() {
49273        let a = _mm512_set1_ps(3.);
49274        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
49275        assert_eq_m512(r, a);
49276        let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
49277        let e = _mm512_setr_ps(
49278            3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
49279        );
49280        assert_eq_m512(r, e);
49281    }
49282
49283    #[simd_test(enable = "avx512f")]
49284    fn test_mm512_maskz_getexp_round_ps() {
49285        let a = _mm512_set1_ps(3.);
49286        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
49287        assert_eq_m512(r, _mm512_setzero_ps());
49288        let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
49289        let e = _mm512_setr_ps(
49290            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
49291        );
49292        assert_eq_m512(r, e);
49293    }
49294
49295    #[simd_test(enable = "avx512f")]
49296    fn test_mm512_roundscale_round_ps() {
49297        let a = _mm512_set1_ps(1.1);
49298        let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
49299        let e = _mm512_set1_ps(1.0);
49300        assert_eq_m512(r, e);
49301    }
49302
49303    #[simd_test(enable = "avx512f")]
49304    fn test_mm512_mask_roundscale_round_ps() {
49305        let a = _mm512_set1_ps(1.1);
49306        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
49307        let e = _mm512_set1_ps(1.1);
49308        assert_eq_m512(r, e);
49309        let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
49310            a,
49311            0b11111111_11111111,
49312            a,
49313        );
49314        let e = _mm512_set1_ps(1.0);
49315        assert_eq_m512(r, e);
49316    }
49317
49318    #[simd_test(enable = "avx512f")]
49319    fn test_mm512_maskz_roundscale_round_ps() {
49320        let a = _mm512_set1_ps(1.1);
49321        let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
49322        assert_eq_m512(r, _mm512_setzero_ps());
49323        let r =
49324            _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
49325        let e = _mm512_set1_ps(1.0);
49326        assert_eq_m512(r, e);
49327    }
49328
49329    #[simd_test(enable = "avx512f")]
49330    fn test_mm512_scalef_round_ps() {
49331        let a = _mm512_set1_ps(1.);
49332        let b = _mm512_set1_ps(3.);
49333        let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
49334        let e = _mm512_set1_ps(8.);
49335        assert_eq_m512(r, e);
49336    }
49337
49338    #[simd_test(enable = "avx512f")]
49339    fn test_mm512_mask_scalef_round_ps() {
49340        let a = _mm512_set1_ps(1.);
49341        let b = _mm512_set1_ps(3.);
49342        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49343            a, 0, a, b,
49344        );
49345        assert_eq_m512(r, a);
49346        let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49347            a,
49348            0b11111111_00000000,
49349            a,
49350            b,
49351        );
49352        let e = _mm512_set_ps(
49353            8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
49354        );
49355        assert_eq_m512(r, e);
49356    }
49357
49358    #[simd_test(enable = "avx512f")]
49359    fn test_mm512_maskz_scalef_round_ps() {
49360        let a = _mm512_set1_ps(1.);
49361        let b = _mm512_set1_ps(3.);
49362        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49363            0, a, b,
49364        );
49365        assert_eq_m512(r, _mm512_setzero_ps());
49366        let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49367            0b11111111_00000000,
49368            a,
49369            b,
49370        );
49371        let e = _mm512_set_ps(
49372            8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
49373        );
49374        assert_eq_m512(r, e);
49375    }
49376
49377    #[simd_test(enable = "avx512f")]
49378    fn test_mm512_fixupimm_round_ps() {
49379        let a = _mm512_set1_ps(f32::NAN);
49380        let b = _mm512_set1_ps(f32::MAX);
49381        let c = _mm512_set1_epi32(i32::MAX);
49382        let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
49383        let e = _mm512_set1_ps(0.0);
49384        assert_eq_m512(r, e);
49385    }
49386
49387    #[simd_test(enable = "avx512f")]
49388    fn test_mm512_mask_fixupimm_round_ps() {
49389        #[rustfmt::skip]
49390        let a = _mm512_set_ps(
49391            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49392            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49393            1., 1., 1., 1.,
49394            1., 1., 1., 1.,
49395        );
49396        let b = _mm512_set1_ps(f32::MAX);
49397        let c = _mm512_set1_epi32(i32::MAX);
49398        let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
49399            a,
49400            0b11111111_00000000,
49401            b,
49402            c,
49403        );
49404        let e = _mm512_set_ps(
49405            0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
49406        );
49407        assert_eq_m512(r, e);
49408    }
49409
49410    #[simd_test(enable = "avx512f")]
49411    fn test_mm512_maskz_fixupimm_round_ps() {
49412        #[rustfmt::skip]
49413        let a = _mm512_set_ps(
49414            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49415            f32::NAN, f32::NAN, f32::NAN, f32::NAN,
49416            1., 1., 1., 1.,
49417            1., 1., 1., 1.,
49418        );
49419        let b = _mm512_set1_ps(f32::MAX);
49420        let c = _mm512_set1_epi32(i32::MAX);
49421        let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
49422            0b11111111_00000000,
49423            a,
49424            b,
49425            c,
49426        );
49427        let e = _mm512_set_ps(
49428            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
49429        );
49430        assert_eq_m512(r, e);
49431    }
49432
49433    #[simd_test(enable = "avx512f")]
49434    fn test_mm512_getmant_round_ps() {
49435        let a = _mm512_set1_ps(10.);
49436        let r = _mm512_getmant_round_ps::<
49437            _MM_MANT_NORM_1_2,
49438            _MM_MANT_SIGN_SRC,
49439            _MM_FROUND_CUR_DIRECTION,
49440        >(a);
49441        let e = _mm512_set1_ps(1.25);
49442        assert_eq_m512(r, e);
49443    }
49444
49445    #[simd_test(enable = "avx512f")]
49446    fn test_mm512_mask_getmant_round_ps() {
49447        let a = _mm512_set1_ps(10.);
49448        let r = _mm512_mask_getmant_round_ps::<
49449            _MM_MANT_NORM_1_2,
49450            _MM_MANT_SIGN_SRC,
49451            _MM_FROUND_CUR_DIRECTION,
49452        >(a, 0, a);
49453        assert_eq_m512(r, a);
49454        let r = _mm512_mask_getmant_round_ps::<
49455            _MM_MANT_NORM_1_2,
49456            _MM_MANT_SIGN_SRC,
49457            _MM_FROUND_CUR_DIRECTION,
49458        >(a, 0b11111111_00000000, a);
49459        let e = _mm512_setr_ps(
49460            10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
49461        );
49462        assert_eq_m512(r, e);
49463    }
49464
49465    #[simd_test(enable = "avx512f")]
49466    fn test_mm512_maskz_getmant_round_ps() {
49467        let a = _mm512_set1_ps(10.);
49468        let r = _mm512_maskz_getmant_round_ps::<
49469            _MM_MANT_NORM_1_2,
49470            _MM_MANT_SIGN_SRC,
49471            _MM_FROUND_CUR_DIRECTION,
49472        >(0, a);
49473        assert_eq_m512(r, _mm512_setzero_ps());
49474        let r = _mm512_maskz_getmant_round_ps::<
49475            _MM_MANT_NORM_1_2,
49476            _MM_MANT_SIGN_SRC,
49477            _MM_FROUND_CUR_DIRECTION,
49478        >(0b11111111_00000000, a);
49479        let e = _mm512_setr_ps(
49480            0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
49481        );
49482        assert_eq_m512(r, e);
49483    }
49484
49485    #[simd_test(enable = "avx512f")]
49486    fn test_mm512_cvtps_epi32() {
49487        let a = _mm512_setr_ps(
49488            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49489        );
49490        let r = _mm512_cvtps_epi32(a);
49491        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49492        assert_eq_m512i(r, e);
49493    }
49494
49495    #[simd_test(enable = "avx512f")]
49496    fn test_mm512_mask_cvtps_epi32() {
49497        let a = _mm512_setr_ps(
49498            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49499        );
49500        let src = _mm512_set1_epi32(0);
49501        let r = _mm512_mask_cvtps_epi32(src, 0, a);
49502        assert_eq_m512i(r, src);
49503        let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
49504        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49505        assert_eq_m512i(r, e);
49506    }
49507
49508    #[simd_test(enable = "avx512f")]
49509    fn test_mm512_maskz_cvtps_epi32() {
49510        let a = _mm512_setr_ps(
49511            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49512        );
49513        let r = _mm512_maskz_cvtps_epi32(0, a);
49514        assert_eq_m512i(r, _mm512_setzero_si512());
49515        let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
49516        let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49517        assert_eq_m512i(r, e);
49518    }
49519
49520    #[simd_test(enable = "avx512f,avx512vl")]
49521    fn test_mm256_mask_cvtps_epi32() {
49522        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49523        let src = _mm256_set1_epi32(0);
49524        let r = _mm256_mask_cvtps_epi32(src, 0, a);
49525        assert_eq_m256i(r, src);
49526        let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
49527        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49528        assert_eq_m256i(r, e);
49529    }
49530
49531    #[simd_test(enable = "avx512f,avx512vl")]
49532    fn test_mm256_maskz_cvtps_epi32() {
49533        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49534        let r = _mm256_maskz_cvtps_epi32(0, a);
49535        assert_eq_m256i(r, _mm256_setzero_si256());
49536        let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
49537        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49538        assert_eq_m256i(r, e);
49539    }
49540
49541    #[simd_test(enable = "avx512f,avx512vl")]
49542    fn test_mm_mask_cvtps_epi32() {
49543        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49544        let src = _mm_set1_epi32(0);
49545        let r = _mm_mask_cvtps_epi32(src, 0, a);
49546        assert_eq_m128i(r, src);
49547        let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
49548        let e = _mm_set_epi32(12, 14, 14, 16);
49549        assert_eq_m128i(r, e);
49550    }
49551
49552    #[simd_test(enable = "avx512f,avx512vl")]
49553    fn test_mm_maskz_cvtps_epi32() {
49554        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49555        let r = _mm_maskz_cvtps_epi32(0, a);
49556        assert_eq_m128i(r, _mm_setzero_si128());
49557        let r = _mm_maskz_cvtps_epi32(0b00001111, a);
49558        let e = _mm_set_epi32(12, 14, 14, 16);
49559        assert_eq_m128i(r, e);
49560    }
49561
49562    #[simd_test(enable = "avx512f")]
49563    fn test_mm512_cvtps_epu32() {
49564        let a = _mm512_setr_ps(
49565            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49566        );
49567        let r = _mm512_cvtps_epu32(a);
49568        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
49569        assert_eq_m512i(r, e);
49570    }
49571
49572    #[simd_test(enable = "avx512f")]
49573    fn test_mm512_mask_cvtps_epu32() {
49574        let a = _mm512_setr_ps(
49575            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49576        );
49577        let src = _mm512_set1_epi32(0);
49578        let r = _mm512_mask_cvtps_epu32(src, 0, a);
49579        assert_eq_m512i(r, src);
49580        let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
49581        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49582        assert_eq_m512i(r, e);
49583    }
49584
49585    #[simd_test(enable = "avx512f")]
49586    fn test_mm512_maskz_cvtps_epu32() {
49587        let a = _mm512_setr_ps(
49588            0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49589        );
49590        let r = _mm512_maskz_cvtps_epu32(0, a);
49591        assert_eq_m512i(r, _mm512_setzero_si512());
49592        let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
49593        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49594        assert_eq_m512i(r, e);
49595    }
49596
49597    #[simd_test(enable = "avx512f,avx512vl")]
49598    fn test_mm256_cvtps_epu32() {
49599        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49600        let r = _mm256_cvtps_epu32(a);
49601        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49602        assert_eq_m256i(r, e);
49603    }
49604
49605    #[simd_test(enable = "avx512f,avx512vl")]
49606    fn test_mm256_mask_cvtps_epu32() {
49607        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49608        let src = _mm256_set1_epi32(0);
49609        let r = _mm256_mask_cvtps_epu32(src, 0, a);
49610        assert_eq_m256i(r, src);
49611        let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
49612        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49613        assert_eq_m256i(r, e);
49614    }
49615
49616    #[simd_test(enable = "avx512f,avx512vl")]
49617    fn test_mm256_maskz_cvtps_epu32() {
49618        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49619        let r = _mm256_maskz_cvtps_epu32(0, a);
49620        assert_eq_m256i(r, _mm256_setzero_si256());
49621        let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
49622        let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
49623        assert_eq_m256i(r, e);
49624    }
49625
49626    #[simd_test(enable = "avx512f,avx512vl")]
49627    fn test_mm_cvtps_epu32() {
49628        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49629        let r = _mm_cvtps_epu32(a);
49630        let e = _mm_set_epi32(12, 14, 14, 16);
49631        assert_eq_m128i(r, e);
49632    }
49633
49634    #[simd_test(enable = "avx512f,avx512vl")]
49635    fn test_mm_mask_cvtps_epu32() {
49636        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49637        let src = _mm_set1_epi32(0);
49638        let r = _mm_mask_cvtps_epu32(src, 0, a);
49639        assert_eq_m128i(r, src);
49640        let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
49641        let e = _mm_set_epi32(12, 14, 14, 16);
49642        assert_eq_m128i(r, e);
49643    }
49644
49645    #[simd_test(enable = "avx512f,avx512vl")]
49646    fn test_mm_maskz_cvtps_epu32() {
49647        let a = _mm_set_ps(12., 13.5, 14., 15.5);
49648        let r = _mm_maskz_cvtps_epu32(0, a);
49649        assert_eq_m128i(r, _mm_setzero_si128());
49650        let r = _mm_maskz_cvtps_epu32(0b00001111, a);
49651        let e = _mm_set_epi32(12, 14, 14, 16);
49652        assert_eq_m128i(r, e);
49653    }
49654
49655    #[simd_test(enable = "avx512f")]
49656    const fn test_mm512_cvtepi8_epi32() {
49657        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49658        let r = _mm512_cvtepi8_epi32(a);
49659        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49660        assert_eq_m512i(r, e);
49661    }
49662
49663    #[simd_test(enable = "avx512f")]
49664    const fn test_mm512_mask_cvtepi8_epi32() {
49665        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49666        let src = _mm512_set1_epi32(-1);
49667        let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
49668        assert_eq_m512i(r, src);
49669        let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
49670        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49671        assert_eq_m512i(r, e);
49672    }
49673
49674    #[simd_test(enable = "avx512f")]
49675    const fn test_mm512_maskz_cvtepi8_epi32() {
49676        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49677        let r = _mm512_maskz_cvtepi8_epi32(0, a);
49678        assert_eq_m512i(r, _mm512_setzero_si512());
49679        let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
49680        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49681        assert_eq_m512i(r, e);
49682    }
49683
49684    #[simd_test(enable = "avx512f,avx512vl")]
49685    const fn test_mm256_mask_cvtepi8_epi32() {
49686        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49687        let src = _mm256_set1_epi32(-1);
49688        let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
49689        assert_eq_m256i(r, src);
49690        let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
49691        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49692        assert_eq_m256i(r, e);
49693    }
49694
49695    #[simd_test(enable = "avx512f,avx512vl")]
49696    const fn test_mm256_maskz_cvtepi8_epi32() {
49697        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49698        let r = _mm256_maskz_cvtepi8_epi32(0, a);
49699        assert_eq_m256i(r, _mm256_setzero_si256());
49700        let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
49701        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49702        assert_eq_m256i(r, e);
49703    }
49704
49705    #[simd_test(enable = "avx512f,avx512vl")]
49706    const fn test_mm_mask_cvtepi8_epi32() {
49707        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49708        let src = _mm_set1_epi32(-1);
49709        let r = _mm_mask_cvtepi8_epi32(src, 0, a);
49710        assert_eq_m128i(r, src);
49711        let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
49712        let e = _mm_set_epi32(12, 13, 14, 15);
49713        assert_eq_m128i(r, e);
49714    }
49715
49716    #[simd_test(enable = "avx512f,avx512vl")]
49717    const fn test_mm_maskz_cvtepi8_epi32() {
49718        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49719        let r = _mm_maskz_cvtepi8_epi32(0, a);
49720        assert_eq_m128i(r, _mm_setzero_si128());
49721        let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
49722        let e = _mm_set_epi32(12, 13, 14, 15);
49723        assert_eq_m128i(r, e);
49724    }
49725
49726    #[simd_test(enable = "avx512f")]
49727    const fn test_mm512_cvtepu8_epi32() {
49728        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49729        let r = _mm512_cvtepu8_epi32(a);
49730        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49731        assert_eq_m512i(r, e);
49732    }
49733
49734    #[simd_test(enable = "avx512f")]
49735    const fn test_mm512_mask_cvtepu8_epi32() {
49736        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49737        let src = _mm512_set1_epi32(-1);
49738        let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
49739        assert_eq_m512i(r, src);
49740        let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
49741        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49742        assert_eq_m512i(r, e);
49743    }
49744
49745    #[simd_test(enable = "avx512f")]
49746    const fn test_mm512_maskz_cvtepu8_epi32() {
49747        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49748        let r = _mm512_maskz_cvtepu8_epi32(0, a);
49749        assert_eq_m512i(r, _mm512_setzero_si512());
49750        let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
49751        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49752        assert_eq_m512i(r, e);
49753    }
49754
49755    #[simd_test(enable = "avx512f,avx512vl")]
49756    const fn test_mm256_mask_cvtepu8_epi32() {
49757        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49758        let src = _mm256_set1_epi32(-1);
49759        let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
49760        assert_eq_m256i(r, src);
49761        let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
49762        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49763        assert_eq_m256i(r, e);
49764    }
49765
49766    #[simd_test(enable = "avx512f,avx512vl")]
49767    const fn test_mm256_maskz_cvtepu8_epi32() {
49768        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49769        let r = _mm256_maskz_cvtepu8_epi32(0, a);
49770        assert_eq_m256i(r, _mm256_setzero_si256());
49771        let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
49772        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49773        assert_eq_m256i(r, e);
49774    }
49775
49776    #[simd_test(enable = "avx512f,avx512vl")]
49777    const fn test_mm_mask_cvtepu8_epi32() {
49778        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49779        let src = _mm_set1_epi32(-1);
49780        let r = _mm_mask_cvtepu8_epi32(src, 0, a);
49781        assert_eq_m128i(r, src);
49782        let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
49783        let e = _mm_set_epi32(12, 13, 14, 15);
49784        assert_eq_m128i(r, e);
49785    }
49786
49787    #[simd_test(enable = "avx512f,avx512vl")]
49788    const fn test_mm_maskz_cvtepu8_epi32() {
49789        let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49790        let r = _mm_maskz_cvtepu8_epi32(0, a);
49791        assert_eq_m128i(r, _mm_setzero_si128());
49792        let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
49793        let e = _mm_set_epi32(12, 13, 14, 15);
49794        assert_eq_m128i(r, e);
49795    }
49796
49797    #[simd_test(enable = "avx512f")]
49798    const fn test_mm512_cvtepi16_epi32() {
49799        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49800        let r = _mm512_cvtepi16_epi32(a);
49801        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49802        assert_eq_m512i(r, e);
49803    }
49804
49805    #[simd_test(enable = "avx512f")]
49806    const fn test_mm512_mask_cvtepi16_epi32() {
49807        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49808        let src = _mm512_set1_epi32(-1);
49809        let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
49810        assert_eq_m512i(r, src);
49811        let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
49812        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49813        assert_eq_m512i(r, e);
49814    }
49815
49816    #[simd_test(enable = "avx512f")]
49817    const fn test_mm512_maskz_cvtepi16_epi32() {
49818        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49819        let r = _mm512_maskz_cvtepi16_epi32(0, a);
49820        assert_eq_m512i(r, _mm512_setzero_si512());
49821        let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
49822        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49823        assert_eq_m512i(r, e);
49824    }
49825
49826    #[simd_test(enable = "avx512f,avx512vl")]
49827    const fn test_mm256_mask_cvtepi16_epi32() {
49828        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49829        let src = _mm256_set1_epi32(-1);
49830        let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
49831        assert_eq_m256i(r, src);
49832        let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
49833        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
49834        assert_eq_m256i(r, e);
49835    }
49836
49837    #[simd_test(enable = "avx512f,avx512vl")]
49838    const fn test_mm256_maskz_cvtepi16_epi32() {
49839        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49840        let r = _mm256_maskz_cvtepi16_epi32(0, a);
49841        assert_eq_m256i(r, _mm256_setzero_si256());
49842        let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
49843        let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
49844        assert_eq_m256i(r, e);
49845    }
49846
49847    #[simd_test(enable = "avx512f,avx512vl")]
49848    const fn test_mm_mask_cvtepi16_epi32() {
49849        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49850        let src = _mm_set1_epi32(-1);
49851        let r = _mm_mask_cvtepi16_epi32(src, 0, a);
49852        assert_eq_m128i(r, src);
49853        let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
49854        let e = _mm_set_epi32(4, 5, 6, 7);
49855        assert_eq_m128i(r, e);
49856    }
49857
49858    #[simd_test(enable = "avx512f,avx512vl")]
49859    const fn test_mm_maskz_cvtepi16_epi32() {
49860        let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
49861        let r = _mm_maskz_cvtepi16_epi32(0, a);
49862        assert_eq_m128i(r, _mm_setzero_si128());
49863        let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
49864        let e = _mm_set_epi32(4, 5, 6, 7);
49865        assert_eq_m128i(r, e);
49866    }
49867
49868    #[simd_test(enable = "avx512f")]
49869    const fn test_mm512_cvtepu16_epi32() {
49870        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49871        let r = _mm512_cvtepu16_epi32(a);
49872        let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49873        assert_eq_m512i(r, e);
49874    }
49875
49876    #[simd_test(enable = "avx512f")]
49877    const fn test_mm512_mask_cvtepu16_epi32() {
49878        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49879        let src = _mm512_set1_epi32(-1);
49880        let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
49881        assert_eq_m512i(r, src);
49882        let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
49883        let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49884        assert_eq_m512i(r, e);
49885    }
49886
49887    #[simd_test(enable = "avx512f")]
49888    const fn test_mm512_maskz_cvtepu16_epi32() {
49889        let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49890        let r = _mm512_maskz_cvtepu16_epi32(0, a);
49891        assert_eq_m512i(r, _mm512_setzero_si512());
49892        let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
49893        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
49894        assert_eq_m512i(r, e);
49895    }
49896
49897    #[simd_test(enable = "avx512f,avx512vl")]
49898    const fn test_mm256_mask_cvtepu16_epi32() {
49899        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49900        let src = _mm256_set1_epi32(-1);
49901        let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
49902        assert_eq_m256i(r, src);
49903        let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
49904        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49905        assert_eq_m256i(r, e);
49906    }
49907
49908    #[simd_test(enable = "avx512f,avx512vl")]
49909    const fn test_mm256_maskz_cvtepu16_epi32() {
49910        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49911        let r = _mm256_maskz_cvtepu16_epi32(0, a);
49912        assert_eq_m256i(r, _mm256_setzero_si256());
49913        let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
49914        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49915        assert_eq_m256i(r, e);
49916    }
49917
49918    #[simd_test(enable = "avx512f,avx512vl")]
49919    const fn test_mm_mask_cvtepu16_epi32() {
49920        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49921        let src = _mm_set1_epi32(-1);
49922        let r = _mm_mask_cvtepu16_epi32(src, 0, a);
49923        assert_eq_m128i(r, src);
49924        let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
49925        let e = _mm_set_epi32(12, 13, 14, 15);
49926        assert_eq_m128i(r, e);
49927    }
49928
49929    #[simd_test(enable = "avx512f,avx512vl")]
49930    const fn test_mm_maskz_cvtepu16_epi32() {
49931        let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
49932        let r = _mm_maskz_cvtepu16_epi32(0, a);
49933        assert_eq_m128i(r, _mm_setzero_si128());
49934        let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
49935        let e = _mm_set_epi32(12, 13, 14, 15);
49936        assert_eq_m128i(r, e);
49937    }
49938
49939    #[simd_test(enable = "avx512f")]
49940    const fn test_mm512_cvtepi32_ps() {
49941        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49942        let r = _mm512_cvtepi32_ps(a);
49943        let e = _mm512_set_ps(
49944            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
49945        );
49946        assert_eq_m512(r, e);
49947    }
49948
49949    #[simd_test(enable = "avx512f")]
49950    const fn test_mm512_mask_cvtepi32_ps() {
49951        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49952        let src = _mm512_set1_ps(-1.);
49953        let r = _mm512_mask_cvtepi32_ps(src, 0, a);
49954        assert_eq_m512(r, src);
49955        let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
49956        let e = _mm512_set_ps(
49957            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
49958        );
49959        assert_eq_m512(r, e);
49960    }
49961
49962    #[simd_test(enable = "avx512f")]
49963    const fn test_mm512_maskz_cvtepi32_ps() {
49964        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
49965        let r = _mm512_maskz_cvtepi32_ps(0, a);
49966        assert_eq_m512(r, _mm512_setzero_ps());
49967        let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
49968        let e = _mm512_set_ps(
49969            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
49970        );
49971        assert_eq_m512(r, e);
49972    }
49973
49974    #[simd_test(enable = "avx512f,avx512vl")]
49975    const fn test_mm256_mask_cvtepi32_ps() {
49976        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49977        let src = _mm256_set1_ps(-1.);
49978        let r = _mm256_mask_cvtepi32_ps(src, 0, a);
49979        assert_eq_m256(r, src);
49980        let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
49981        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
49982        assert_eq_m256(r, e);
49983    }
49984
49985    #[simd_test(enable = "avx512f,avx512vl")]
49986    const fn test_mm256_maskz_cvtepi32_ps() {
49987        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
49988        let r = _mm256_maskz_cvtepi32_ps(0, a);
49989        assert_eq_m256(r, _mm256_setzero_ps());
49990        let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
49991        let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
49992        assert_eq_m256(r, e);
49993    }
49994
49995    #[simd_test(enable = "avx512f,avx512vl")]
49996    const fn test_mm_mask_cvtepi32_ps() {
49997        let a = _mm_set_epi32(1, 2, 3, 4);
49998        let src = _mm_set1_ps(-1.);
49999        let r = _mm_mask_cvtepi32_ps(src, 0, a);
50000        assert_eq_m128(r, src);
50001        let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
50002        let e = _mm_set_ps(1., 2., 3., 4.);
50003        assert_eq_m128(r, e);
50004    }
50005
50006    #[simd_test(enable = "avx512f,avx512vl")]
50007    const fn test_mm_maskz_cvtepi32_ps() {
50008        let a = _mm_set_epi32(1, 2, 3, 4);
50009        let r = _mm_maskz_cvtepi32_ps(0, a);
50010        assert_eq_m128(r, _mm_setzero_ps());
50011        let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
50012        let e = _mm_set_ps(1., 2., 3., 4.);
50013        assert_eq_m128(r, e);
50014    }
50015
50016    #[simd_test(enable = "avx512f")]
50017    const fn test_mm512_cvtepu32_ps() {
50018        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50019        let r = _mm512_cvtepu32_ps(a);
50020        let e = _mm512_set_ps(
50021            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50022        );
50023        assert_eq_m512(r, e);
50024    }
50025
50026    #[simd_test(enable = "avx512f")]
50027    const fn test_mm512_mask_cvtepu32_ps() {
50028        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50029        let src = _mm512_set1_ps(-1.);
50030        let r = _mm512_mask_cvtepu32_ps(src, 0, a);
50031        assert_eq_m512(r, src);
50032        let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
50033        let e = _mm512_set_ps(
50034            -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
50035        );
50036        assert_eq_m512(r, e);
50037    }
50038
50039    #[simd_test(enable = "avx512f")]
50040    const fn test_mm512_maskz_cvtepu32_ps() {
50041        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50042        let r = _mm512_maskz_cvtepu32_ps(0, a);
50043        assert_eq_m512(r, _mm512_setzero_ps());
50044        let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
50045        let e = _mm512_set_ps(
50046            0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
50047        );
50048        assert_eq_m512(r, e);
50049    }
50050
50051    #[simd_test(enable = "avx512f")]
50052    const fn test_mm512_cvtepi32_epi16() {
50053        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50054        let r = _mm512_cvtepi32_epi16(a);
50055        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50056        assert_eq_m256i(r, e);
50057    }
50058
50059    #[simd_test(enable = "avx512f")]
50060    const fn test_mm512_mask_cvtepi32_epi16() {
50061        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50062        let src = _mm256_set1_epi16(-1);
50063        let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
50064        assert_eq_m256i(r, src);
50065        let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
50066        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
50067        assert_eq_m256i(r, e);
50068    }
50069
50070    #[simd_test(enable = "avx512f")]
50071    const fn test_mm512_maskz_cvtepi32_epi16() {
50072        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50073        let r = _mm512_maskz_cvtepi32_epi16(0, a);
50074        assert_eq_m256i(r, _mm256_setzero_si256());
50075        let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
50076        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
50077        assert_eq_m256i(r, e);
50078    }
50079
50080    #[simd_test(enable = "avx512f,avx512vl")]
50081    const fn test_mm256_cvtepi32_epi16() {
50082        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50083        let r = _mm256_cvtepi32_epi16(a);
50084        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50085        assert_eq_m128i(r, e);
50086    }
50087
50088    #[simd_test(enable = "avx512f,avx512vl")]
50089    const fn test_mm256_mask_cvtepi32_epi16() {
50090        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50091        let src = _mm_set1_epi16(-1);
50092        let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
50093        assert_eq_m128i(r, src);
50094        let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
50095        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50096        assert_eq_m128i(r, e);
50097    }
50098
50099    #[simd_test(enable = "avx512f,avx512vl")]
50100    const fn test_mm256_maskz_cvtepi32_epi16() {
50101        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50102        let r = _mm256_maskz_cvtepi32_epi16(0, a);
50103        assert_eq_m128i(r, _mm_setzero_si128());
50104        let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
50105        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50106        assert_eq_m128i(r, e);
50107    }
50108
50109    #[simd_test(enable = "avx512f,avx512vl")]
50110    fn test_mm_cvtepi32_epi16() {
50111        let a = _mm_set_epi32(4, 5, 6, 7);
50112        let r = _mm_cvtepi32_epi16(a);
50113        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50114        assert_eq_m128i(r, e);
50115    }
50116
50117    #[simd_test(enable = "avx512f,avx512vl")]
50118    fn test_mm_mask_cvtepi32_epi16() {
50119        let a = _mm_set_epi32(4, 5, 6, 7);
50120        let src = _mm_set1_epi16(0);
50121        let r = _mm_mask_cvtepi32_epi16(src, 0, a);
50122        assert_eq_m128i(r, src);
50123        let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
50124        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50125        assert_eq_m128i(r, e);
50126    }
50127
50128    #[simd_test(enable = "avx512f,avx512vl")]
50129    fn test_mm_maskz_cvtepi32_epi16() {
50130        let a = _mm_set_epi32(4, 5, 6, 7);
50131        let r = _mm_maskz_cvtepi32_epi16(0, a);
50132        assert_eq_m128i(r, _mm_setzero_si128());
50133        let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
50134        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50135        assert_eq_m128i(r, e);
50136    }
50137
50138    #[simd_test(enable = "avx512f")]
50139    const fn test_mm512_cvtepi32_epi8() {
50140        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50141        let r = _mm512_cvtepi32_epi8(a);
50142        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50143        assert_eq_m128i(r, e);
50144    }
50145
50146    #[simd_test(enable = "avx512f")]
50147    const fn test_mm512_mask_cvtepi32_epi8() {
50148        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50149        let src = _mm_set1_epi8(-1);
50150        let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
50151        assert_eq_m128i(r, src);
50152        let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
50153        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
50154        assert_eq_m128i(r, e);
50155    }
50156
50157    #[simd_test(enable = "avx512f")]
50158    const fn test_mm512_maskz_cvtepi32_epi8() {
50159        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50160        let r = _mm512_maskz_cvtepi32_epi8(0, a);
50161        assert_eq_m128i(r, _mm_setzero_si128());
50162        let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
50163        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
50164        assert_eq_m128i(r, e);
50165    }
50166
50167    #[simd_test(enable = "avx512f,avx512vl")]
50168    fn test_mm256_cvtepi32_epi8() {
50169        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50170        let r = _mm256_cvtepi32_epi8(a);
50171        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
50172        assert_eq_m128i(r, e);
50173    }
50174
50175    #[simd_test(enable = "avx512f,avx512vl")]
50176    fn test_mm256_mask_cvtepi32_epi8() {
50177        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50178        let src = _mm_set1_epi8(0);
50179        let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
50180        assert_eq_m128i(r, src);
50181        let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
50182        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
50183        assert_eq_m128i(r, e);
50184    }
50185
50186    #[simd_test(enable = "avx512f,avx512vl")]
50187    fn test_mm256_maskz_cvtepi32_epi8() {
50188        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50189        let r = _mm256_maskz_cvtepi32_epi8(0, a);
50190        assert_eq_m128i(r, _mm_setzero_si128());
50191        let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
50192        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
50193        assert_eq_m128i(r, e);
50194    }
50195
50196    #[simd_test(enable = "avx512f,avx512vl")]
50197    fn test_mm_cvtepi32_epi8() {
50198        let a = _mm_set_epi32(4, 5, 6, 7);
50199        let r = _mm_cvtepi32_epi8(a);
50200        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
50201        assert_eq_m128i(r, e);
50202    }
50203
50204    #[simd_test(enable = "avx512f,avx512vl")]
50205    fn test_mm_mask_cvtepi32_epi8() {
50206        let a = _mm_set_epi32(4, 5, 6, 7);
50207        let src = _mm_set1_epi8(0);
50208        let r = _mm_mask_cvtepi32_epi8(src, 0, a);
50209        assert_eq_m128i(r, src);
50210        let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
50211        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
50212        assert_eq_m128i(r, e);
50213    }
50214
50215    #[simd_test(enable = "avx512f,avx512vl")]
50216    fn test_mm_maskz_cvtepi32_epi8() {
50217        let a = _mm_set_epi32(4, 5, 6, 7);
50218        let r = _mm_maskz_cvtepi32_epi8(0, a);
50219        assert_eq_m128i(r, _mm_setzero_si128());
50220        let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
50221        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
50222        assert_eq_m128i(r, e);
50223    }
50224
50225    #[simd_test(enable = "avx512f")]
50226    fn test_mm512_cvtsepi32_epi16() {
50227        #[rustfmt::skip]
50228        let a = _mm512_set_epi32(
50229            0, 1, 2, 3,
50230            4, 5, 6, 7,
50231            8, 9, 10, 11,
50232            12, 13, i32::MIN, i32::MAX,
50233        );
50234        let r = _mm512_cvtsepi32_epi16(a);
50235        #[rustfmt::skip]
50236        let e = _mm256_set_epi16(
50237            0, 1, 2, 3,
50238            4, 5, 6, 7,
50239            8, 9, 10, 11,
50240            12, 13, i16::MIN, i16::MAX,
50241        );
50242        assert_eq_m256i(r, e);
50243    }
50244
50245    #[simd_test(enable = "avx512f")]
50246    fn test_mm512_mask_cvtsepi32_epi16() {
50247        #[rustfmt::skip]
50248        let a = _mm512_set_epi32(
50249            0, 1, 2, 3,
50250            4, 5, 6, 7,
50251            8, 9, 10, 11,
50252            12, 13, i32::MIN, i32::MAX,
50253        );
50254        let src = _mm256_set1_epi16(-1);
50255        let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
50256        assert_eq_m256i(r, src);
50257        let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
50258        #[rustfmt::skip]
50259        let e = _mm256_set_epi16(
50260            -1, -1, -1, -1,
50261            -1, -1, -1, -1,
50262            8, 9, 10, 11,
50263            12, 13, i16::MIN, i16::MAX,
50264        );
50265        assert_eq_m256i(r, e);
50266    }
50267
50268    #[simd_test(enable = "avx512f")]
50269    fn test_mm512_maskz_cvtsepi32_epi16() {
50270        #[rustfmt::skip]
50271        let a = _mm512_set_epi32(
50272            0, 1, 2, 3,
50273            4, 5, 6, 7,
50274            8, 9, 10, 11,
50275            12, 13, i32::MIN, i32::MAX,
50276        );
50277        let r = _mm512_maskz_cvtsepi32_epi16(0, a);
50278        assert_eq_m256i(r, _mm256_setzero_si256());
50279        let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
50280        #[rustfmt::skip]
50281        let e = _mm256_set_epi16(
50282            0, 0, 0, 0,
50283            0, 0, 0, 0,
50284            8, 9, 10, 11,
50285            12, 13, i16::MIN, i16::MAX,
50286        );
50287        assert_eq_m256i(r, e);
50288    }
50289
50290    #[simd_test(enable = "avx512f,avx512vl")]
50291    fn test_mm256_cvtsepi32_epi16() {
50292        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50293        let r = _mm256_cvtsepi32_epi16(a);
50294        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50295        assert_eq_m128i(r, e);
50296    }
50297
50298    #[simd_test(enable = "avx512f,avx512vl")]
50299    fn test_mm256_mask_cvtsepi32_epi16() {
50300        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50301        let src = _mm_set1_epi16(-1);
50302        let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
50303        assert_eq_m128i(r, src);
50304        let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
50305        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50306        assert_eq_m128i(r, e);
50307    }
50308
50309    #[simd_test(enable = "avx512f,avx512vl")]
50310    fn test_mm256_maskz_cvtsepi32_epi16() {
50311        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
50312        let r = _mm256_maskz_cvtsepi32_epi16(0, a);
50313        assert_eq_m128i(r, _mm_setzero_si128());
50314        let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
50315        let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
50316        assert_eq_m128i(r, e);
50317    }
50318
50319    #[simd_test(enable = "avx512f,avx512vl")]
50320    fn test_mm_cvtsepi32_epi16() {
50321        let a = _mm_set_epi32(4, 5, 6, 7);
50322        let r = _mm_cvtsepi32_epi16(a);
50323        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50324        assert_eq_m128i(r, e);
50325    }
50326
50327    #[simd_test(enable = "avx512f,avx512vl")]
50328    fn test_mm_mask_cvtsepi32_epi16() {
50329        let a = _mm_set_epi32(4, 5, 6, 7);
50330        let src = _mm_set1_epi16(0);
50331        let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
50332        assert_eq_m128i(r, src);
50333        let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
50334        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50335        assert_eq_m128i(r, e);
50336    }
50337
50338    #[simd_test(enable = "avx512f,avx512vl")]
50339    fn test_mm_maskz_cvtsepi32_epi16() {
50340        let a = _mm_set_epi32(4, 5, 6, 7);
50341        let r = _mm_maskz_cvtsepi32_epi16(0, a);
50342        assert_eq_m128i(r, _mm_setzero_si128());
50343        let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
50344        let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
50345        assert_eq_m128i(r, e);
50346    }
50347
50348    #[simd_test(enable = "avx512f")]
50349    fn test_mm512_cvtsepi32_epi8() {
50350        #[rustfmt::skip]
50351        let a = _mm512_set_epi32(
50352            0, 1, 2, 3,
50353            4, 5, 6, 7,
50354            8, 9, 10, 11,
50355            12, 13, i32::MIN, i32::MAX,
50356        );
50357        let r = _mm512_cvtsepi32_epi8(a);
50358        #[rustfmt::skip]
50359        let e = _mm_set_epi8(
50360            0, 1, 2, 3,
50361            4, 5, 6, 7,
50362            8, 9, 10, 11,
50363            12, 13, i8::MIN, i8::MAX,
50364        );
50365        assert_eq_m128i(r, e);
50366    }
50367
50368    #[simd_test(enable = "avx512f")]
50369    fn test_mm512_mask_cvtsepi32_epi8() {
50370        #[rustfmt::skip]
50371        let a = _mm512_set_epi32(
50372            0, 1, 2, 3,
50373            4, 5, 6, 7,
50374            8, 9, 10, 11,
50375            12, 13, i32::MIN, i32::MAX,
50376        );
50377        let src = _mm_set1_epi8(-1);
50378        let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
50379        assert_eq_m128i(r, src);
50380        let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
50381        #[rustfmt::skip]
50382        let e = _mm_set_epi8(
50383            -1, -1, -1, -1,
50384            -1, -1, -1, -1,
50385            8, 9, 10, 11,
50386            12, 13, i8::MIN, i8::MAX,
50387        );
50388        assert_eq_m128i(r, e);
50389    }
50390
50391    #[simd_test(enable = "avx512f")]
50392    fn test_mm512_maskz_cvtsepi32_epi8() {
50393        #[rustfmt::skip]
50394        let a = _mm512_set_epi32(
50395            0, 1, 2, 3,
50396            4, 5, 6, 7,
50397            8, 9, 10, 11,
50398            12, 13, i32::MIN, i32::MAX,
50399        );
50400        let r = _mm512_maskz_cvtsepi32_epi8(0, a);
50401        assert_eq_m128i(r, _mm_setzero_si128());
50402        let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
50403        #[rustfmt::skip]
50404        let e = _mm_set_epi8(
50405            0, 0, 0, 0,
50406            0, 0, 0, 0,
50407            8, 9, 10, 11,
50408            12, 13, i8::MIN, i8::MAX,
50409        );
50410        assert_eq_m128i(r, e);
50411    }
50412
50413    #[simd_test(enable = "avx512f,avx512vl")]
50414    fn test_mm256_cvtsepi32_epi8() {
50415        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
50416        let r = _mm256_cvtsepi32_epi8(a);
50417        #[rustfmt::skip]
50418        let e = _mm_set_epi8(
50419            0, 0, 0, 0,
50420            0, 0, 0, 0,
50421            9, 10, 11, 12,
50422            13, 14, 15, 16,
50423        );
50424        assert_eq_m128i(r, e);
50425    }
50426
50427    #[simd_test(enable = "avx512f,avx512vl")]
50428    fn test_mm256_mask_cvtsepi32_epi8() {
50429        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
50430        let src = _mm_set1_epi8(0);
50431        let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
50432        assert_eq_m128i(r, src);
50433        let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
50434        #[rustfmt::skip]
50435        let e = _mm_set_epi8(
50436            0, 0, 0, 0,
50437            0, 0, 0, 0,
50438            9, 10, 11, 12,
50439            13, 14, 15, 16,
50440        );
50441        assert_eq_m128i(r, e);
50442    }
50443
50444    #[simd_test(enable = "avx512f,avx512vl")]
50445    fn test_mm256_maskz_cvtsepi32_epi8() {
50446        let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
50447        let r = _mm256_maskz_cvtsepi32_epi8(0, a);
50448        assert_eq_m128i(r, _mm_setzero_si128());
50449        let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
50450        #[rustfmt::skip]
50451        let e = _mm_set_epi8(
50452            0, 0, 0, 0,
50453            0, 0, 0, 0,
50454            9, 10, 11, 12,
50455            13, 14, 15, 16,
50456        );
50457        assert_eq_m128i(r, e);
50458    }
50459
50460    #[simd_test(enable = "avx512f,avx512vl")]
50461    fn test_mm_cvtsepi32_epi8() {
50462        let a = _mm_set_epi32(13, 14, 15, 16);
50463        let r = _mm_cvtsepi32_epi8(a);
50464        #[rustfmt::skip]
50465        let e = _mm_set_epi8(
50466            0, 0, 0, 0,
50467            0, 0, 0, 0,
50468            0, 0, 0, 0,
50469            13, 14, 15, 16,
50470        );
50471        assert_eq_m128i(r, e);
50472    }
50473
50474    #[simd_test(enable = "avx512f,avx512vl")]
50475    fn test_mm_mask_cvtsepi32_epi8() {
50476        let a = _mm_set_epi32(13, 14, 15, 16);
50477        let src = _mm_set1_epi8(0);
50478        let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
50479        assert_eq_m128i(r, src);
50480        let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
50481        #[rustfmt::skip]
50482        let e = _mm_set_epi8(
50483            0, 0, 0, 0,
50484            0, 0, 0, 0,
50485            0, 0, 0, 0,
50486            13, 14, 15, 16,
50487        );
50488        assert_eq_m128i(r, e);
50489    }
50490
50491    #[simd_test(enable = "avx512f,avx512vl")]
50492    fn test_mm_maskz_cvtsepi32_epi8() {
50493        let a = _mm_set_epi32(13, 14, 15, 16);
50494        let r = _mm_maskz_cvtsepi32_epi8(0, a);
50495        assert_eq_m128i(r, _mm_setzero_si128());
50496        let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
50497        #[rustfmt::skip]
50498        let e = _mm_set_epi8(
50499            0, 0, 0, 0,
50500            0, 0, 0, 0,
50501            0, 0, 0, 0,
50502            13, 14, 15, 16,
50503        );
50504        assert_eq_m128i(r, e);
50505    }
50506
50507    #[simd_test(enable = "avx512f")]
50508    fn test_mm512_cvtusepi32_epi16() {
50509        #[rustfmt::skip]
50510        let a = _mm512_set_epi32(
50511            0, 1, 2, 3,
50512            4, 5, 6, 7,
50513            8, 9, 10, 11,
50514            12, 13, i32::MIN, i32::MIN,
50515        );
50516        let r = _mm512_cvtusepi32_epi16(a);
50517        let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
50518        assert_eq_m256i(r, e);
50519    }
50520
50521    #[simd_test(enable = "avx512f")]
50522    fn test_mm512_mask_cvtusepi32_epi16() {
50523        #[rustfmt::skip]
50524        let a = _mm512_set_epi32(
50525            0, 1, 2, 3,
50526            4, 5, 6, 7,
50527            8, 9, 10, 11,
50528            12, 13, i32::MIN, i32::MIN,
50529        );
50530        let src = _mm256_set1_epi16(-1);
50531        let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
50532        assert_eq_m256i(r, src);
50533        let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
50534        let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
50535        assert_eq_m256i(r, e);
50536    }
50537
50538    #[simd_test(enable = "avx512f")]
50539    fn test_mm512_maskz_cvtusepi32_epi16() {
50540        #[rustfmt::skip]
50541        let a = _mm512_set_epi32(
50542            0, 1, 2, 3,
50543            4, 5, 6, 7,
50544            8, 9, 10, 11,
50545            12, 13, i32::MIN, i32::MIN,
50546        );
50547        let r = _mm512_maskz_cvtusepi32_epi16(0, a);
50548        assert_eq_m256i(r, _mm256_setzero_si256());
50549        let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
50550        let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
50551        assert_eq_m256i(r, e);
50552    }
50553
50554    #[simd_test(enable = "avx512f,avx512vl")]
50555    fn test_mm256_cvtusepi32_epi16() {
50556        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
50557        let r = _mm256_cvtusepi32_epi16(a);
50558        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
50559        assert_eq_m128i(r, e);
50560    }
50561
50562    #[simd_test(enable = "avx512f,avx512vl")]
50563    fn test_mm256_mask_cvtusepi32_epi16() {
50564        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
50565        let src = _mm_set1_epi16(0);
50566        let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
50567        assert_eq_m128i(r, src);
50568        let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
50569        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
50570        assert_eq_m128i(r, e);
50571    }
50572
50573    #[simd_test(enable = "avx512f,avx512vl")]
50574    fn test_mm256_maskz_cvtusepi32_epi16() {
50575        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
50576        let r = _mm256_maskz_cvtusepi32_epi16(0, a);
50577        assert_eq_m128i(r, _mm_setzero_si128());
50578        let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
50579        let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
50580        assert_eq_m128i(r, e);
50581    }
50582
50583    #[simd_test(enable = "avx512f,avx512vl")]
50584    fn test_mm_cvtusepi32_epi16() {
50585        let a = _mm_set_epi32(5, 6, 7, 8);
50586        let r = _mm_cvtusepi32_epi16(a);
50587        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
50588        assert_eq_m128i(r, e);
50589    }
50590
50591    #[simd_test(enable = "avx512f,avx512vl")]
50592    fn test_mm_mask_cvtusepi32_epi16() {
50593        let a = _mm_set_epi32(5, 6, 7, 8);
50594        let src = _mm_set1_epi16(0);
50595        let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
50596        assert_eq_m128i(r, src);
50597        let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
50598        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
50599        assert_eq_m128i(r, e);
50600    }
50601
50602    #[simd_test(enable = "avx512f,avx512vl")]
50603    fn test_mm_maskz_cvtusepi32_epi16() {
50604        let a = _mm_set_epi32(5, 6, 7, 8);
50605        let r = _mm_maskz_cvtusepi32_epi16(0, a);
50606        assert_eq_m128i(r, _mm_setzero_si128());
50607        let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
50608        let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
50609        assert_eq_m128i(r, e);
50610    }
50611
50612    #[simd_test(enable = "avx512f")]
50613    fn test_mm512_cvtusepi32_epi8() {
50614        #[rustfmt::skip]
50615        let a = _mm512_set_epi32(
50616            0, 1, 2, 3,
50617            4, 5, 6, 7,
50618            8, 9, 10, 11,
50619            12, 13, i32::MIN, i32::MIN,
50620        );
50621        let r = _mm512_cvtusepi32_epi8(a);
50622        let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
50623        assert_eq_m128i(r, e);
50624    }
50625
50626    #[simd_test(enable = "avx512f")]
50627    fn test_mm512_mask_cvtusepi32_epi8() {
50628        #[rustfmt::skip]
50629        let a = _mm512_set_epi32(
50630            0, 1, 2, 3,
50631            4, 5, 6, 7,
50632            8, 9, 10, 11,
50633            12, 13, i32::MIN, i32::MIN,
50634        );
50635        let src = _mm_set1_epi8(-1);
50636        let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
50637        assert_eq_m128i(r, src);
50638        let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
50639        let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
50640        assert_eq_m128i(r, e);
50641    }
50642
50643    #[simd_test(enable = "avx512f")]
50644    fn test_mm512_maskz_cvtusepi32_epi8() {
50645        #[rustfmt::skip]
50646        let a = _mm512_set_epi32(
50647            0, 1, 2, 3,
50648            4, 5, 6, 7,
50649            8, 9, 10, 11,
50650            12, 13, i32::MIN, i32::MIN,
50651        );
50652        let r = _mm512_maskz_cvtusepi32_epi8(0, a);
50653        assert_eq_m128i(r, _mm_setzero_si128());
50654        let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
50655        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
50656        assert_eq_m128i(r, e);
50657    }
50658
50659    #[simd_test(enable = "avx512f,avx512vl")]
50660    fn test_mm256_cvtusepi32_epi8() {
50661        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
50662        let r = _mm256_cvtusepi32_epi8(a);
50663        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
50664        assert_eq_m128i(r, e);
50665    }
50666
50667    #[simd_test(enable = "avx512f,avx512vl")]
50668    fn test_mm256_mask_cvtusepi32_epi8() {
50669        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
50670        let src = _mm_set1_epi8(0);
50671        let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
50672        assert_eq_m128i(r, src);
50673        let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
50674        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
50675        assert_eq_m128i(r, e);
50676    }
50677
50678    #[simd_test(enable = "avx512f,avx512vl")]
50679    fn test_mm256_maskz_cvtusepi32_epi8() {
50680        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
50681        let r = _mm256_maskz_cvtusepi32_epi8(0, a);
50682        assert_eq_m128i(r, _mm_setzero_si128());
50683        let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
50684        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
50685        assert_eq_m128i(r, e);
50686    }
50687
50688    #[simd_test(enable = "avx512f,avx512vl")]
50689    fn test_mm_cvtusepi32_epi8() {
50690        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
50691        let r = _mm_cvtusepi32_epi8(a);
50692        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
50693        assert_eq_m128i(r, e);
50694    }
50695
50696    #[simd_test(enable = "avx512f,avx512vl")]
50697    fn test_mm_mask_cvtusepi32_epi8() {
50698        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
50699        let src = _mm_set1_epi8(0);
50700        let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
50701        assert_eq_m128i(r, src);
50702        let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
50703        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
50704        assert_eq_m128i(r, e);
50705    }
50706
50707    #[simd_test(enable = "avx512f,avx512vl")]
50708    fn test_mm_maskz_cvtusepi32_epi8() {
50709        let a = _mm_set_epi32(5, 6, 7, i32::MAX);
50710        let r = _mm_maskz_cvtusepi32_epi8(0, a);
50711        assert_eq_m128i(r, _mm_setzero_si128());
50712        let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
50713        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
50714        assert_eq_m128i(r, e);
50715    }
50716
50717    #[simd_test(enable = "avx512f")]
50718    fn test_mm512_cvt_roundps_epi32() {
50719        let a = _mm512_setr_ps(
50720            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50721        );
50722        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50723        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50724        assert_eq_m512i(r, e);
50725        let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
50726        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
50727        assert_eq_m512i(r, e);
50728    }
50729
50730    #[simd_test(enable = "avx512f")]
50731    fn test_mm512_mask_cvt_roundps_epi32() {
50732        let a = _mm512_setr_ps(
50733            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50734        );
50735        let src = _mm512_set1_epi32(0);
50736        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50737            src, 0, a,
50738        );
50739        assert_eq_m512i(r, src);
50740        let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50741            src,
50742            0b00000000_11111111,
50743            a,
50744        );
50745        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
50746        assert_eq_m512i(r, e);
50747    }
50748
50749    #[simd_test(enable = "avx512f")]
50750    fn test_mm512_maskz_cvt_roundps_epi32() {
50751        let a = _mm512_setr_ps(
50752            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50753        );
50754        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50755            0, a,
50756        );
50757        assert_eq_m512i(r, _mm512_setzero_si512());
50758        let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50759            0b00000000_11111111,
50760            a,
50761        );
50762        let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
50763        assert_eq_m512i(r, e);
50764    }
50765
50766    #[simd_test(enable = "avx512f")]
50767    fn test_mm512_cvt_roundps_epu32() {
50768        let a = _mm512_setr_ps(
50769            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50770        );
50771        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50772        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
50773        assert_eq_m512i(r, e);
50774        let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
50775        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
50776        assert_eq_m512i(r, e);
50777    }
50778
50779    #[simd_test(enable = "avx512f")]
50780    fn test_mm512_mask_cvt_roundps_epu32() {
50781        let a = _mm512_setr_ps(
50782            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50783        );
50784        let src = _mm512_set1_epi32(0);
50785        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50786            src, 0, a,
50787        );
50788        assert_eq_m512i(r, src);
50789        let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50790            src,
50791            0b00000000_11111111,
50792            a,
50793        );
50794        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
50795        assert_eq_m512i(r, e);
50796    }
50797
50798    #[simd_test(enable = "avx512f")]
50799    fn test_mm512_maskz_cvt_roundps_epu32() {
50800        let a = _mm512_setr_ps(
50801            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
50802        );
50803        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50804            0, a,
50805        );
50806        assert_eq_m512i(r, _mm512_setzero_si512());
50807        let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50808            0b00000000_11111111,
50809            a,
50810        );
50811        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
50812        assert_eq_m512i(r, e);
50813    }
50814
50815    #[simd_test(enable = "avx512f")]
50816    fn test_mm512_cvt_roundepi32_ps() {
50817        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50818        let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50819        let e = _mm512_setr_ps(
50820            0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
50821        );
50822        assert_eq_m512(r, e);
50823    }
50824
50825    #[simd_test(enable = "avx512f")]
50826    fn test_mm512_mask_cvt_roundepi32_ps() {
50827        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50828        let src = _mm512_set1_ps(0.);
50829        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50830            src, 0, a,
50831        );
50832        assert_eq_m512(r, src);
50833        let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50834            src,
50835            0b00000000_11111111,
50836            a,
50837        );
50838        let e = _mm512_setr_ps(
50839            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
50840        );
50841        assert_eq_m512(r, e);
50842    }
50843
50844    #[simd_test(enable = "avx512f")]
50845    fn test_mm512_maskz_cvt_roundepi32_ps() {
50846        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50847        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50848            0, a,
50849        );
50850        assert_eq_m512(r, _mm512_setzero_ps());
50851        let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50852            0b00000000_11111111,
50853            a,
50854        );
50855        let e = _mm512_setr_ps(
50856            0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
50857        );
50858        assert_eq_m512(r, e);
50859    }
50860
50861    #[simd_test(enable = "avx512f")]
50862    fn test_mm512_cvt_roundepu32_ps() {
50863        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50864        let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
50865        #[rustfmt::skip]
50866        let e = _mm512_setr_ps(
50867            0., 4294967300., 2., 4294967300.,
50868            4., 4294967300., 6., 4294967300.,
50869            8., 10., 10., 12.,
50870            12., 14., 14., 16.,
50871        );
50872        assert_eq_m512(r, e);
50873    }
50874
50875    #[simd_test(enable = "avx512f")]
50876    fn test_mm512_mask_cvt_roundepu32_ps() {
50877        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50878        let src = _mm512_set1_ps(0.);
50879        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50880            src, 0, a,
50881        );
50882        assert_eq_m512(r, src);
50883        let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50884            src,
50885            0b00000000_11111111,
50886            a,
50887        );
50888        #[rustfmt::skip]
50889        let e = _mm512_setr_ps(
50890            0., 4294967300., 2., 4294967300.,
50891            4., 4294967300., 6., 4294967300.,
50892            0., 0., 0., 0.,
50893            0., 0., 0., 0.,
50894        );
50895        assert_eq_m512(r, e);
50896    }
50897
50898    #[simd_test(enable = "avx512f")]
50899    fn test_mm512_maskz_cvt_roundepu32_ps() {
50900        let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
50901        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50902            0, a,
50903        );
50904        assert_eq_m512(r, _mm512_setzero_ps());
50905        let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
50906            0b00000000_11111111,
50907            a,
50908        );
50909        #[rustfmt::skip]
50910        let e = _mm512_setr_ps(
50911            0., 4294967300., 2., 4294967300.,
50912            4., 4294967300., 6., 4294967300.,
50913            0., 0., 0., 0.,
50914            0., 0., 0., 0.,
50915        );
50916        assert_eq_m512(r, e);
50917    }
50918
50919    #[simd_test(enable = "avx512f")]
50920    fn test_mm512_cvt_roundps_ph() {
50921        let a = _mm512_set1_ps(1.);
50922        let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
50923        let e = _mm256_setr_epi64x(
50924            4323521613979991040,
50925            4323521613979991040,
50926            4323521613979991040,
50927            4323521613979991040,
50928        );
50929        assert_eq_m256i(r, e);
50930    }
50931
50932    #[simd_test(enable = "avx512f")]
50933    fn test_mm512_mask_cvt_roundps_ph() {
50934        let a = _mm512_set1_ps(1.);
50935        let src = _mm256_set1_epi16(0);
50936        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
50937        assert_eq_m256i(r, src);
50938        let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
50939        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
50940        assert_eq_m256i(r, e);
50941    }
50942
50943    #[simd_test(enable = "avx512f")]
50944    fn test_mm512_maskz_cvt_roundps_ph() {
50945        let a = _mm512_set1_ps(1.);
50946        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
50947        assert_eq_m256i(r, _mm256_setzero_si256());
50948        let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
50949        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
50950        assert_eq_m256i(r, e);
50951    }
50952
50953    #[simd_test(enable = "avx512f,avx512vl")]
50954    fn test_mm256_mask_cvt_roundps_ph() {
50955        let a = _mm256_set1_ps(1.);
50956        let src = _mm_set1_epi16(0);
50957        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
50958        assert_eq_m128i(r, src);
50959        let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
50960        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
50961        assert_eq_m128i(r, e);
50962    }
50963
50964    #[simd_test(enable = "avx512f,avx512vl")]
50965    fn test_mm256_maskz_cvt_roundps_ph() {
50966        let a = _mm256_set1_ps(1.);
50967        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
50968        assert_eq_m128i(r, _mm_setzero_si128());
50969        let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
50970        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
50971        assert_eq_m128i(r, e);
50972    }
50973
50974    #[simd_test(enable = "avx512f,avx512vl")]
50975    fn test_mm_mask_cvt_roundps_ph() {
50976        let a = _mm_set1_ps(1.);
50977        let src = _mm_set1_epi16(0);
50978        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
50979        assert_eq_m128i(r, src);
50980        let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
50981        let e = _mm_setr_epi64x(4323521613979991040, 0);
50982        assert_eq_m128i(r, e);
50983    }
50984
50985    #[simd_test(enable = "avx512f,avx512vl")]
50986    fn test_mm_maskz_cvt_roundps_ph() {
50987        let a = _mm_set1_ps(1.);
50988        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
50989        assert_eq_m128i(r, _mm_setzero_si128());
50990        let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
50991        let e = _mm_setr_epi64x(4323521613979991040, 0);
50992        assert_eq_m128i(r, e);
50993    }
50994
50995    #[simd_test(enable = "avx512f")]
50996    fn test_mm512_cvtps_ph() {
50997        let a = _mm512_set1_ps(1.);
50998        let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
50999        let e = _mm256_setr_epi64x(
51000            4323521613979991040,
51001            4323521613979991040,
51002            4323521613979991040,
51003            4323521613979991040,
51004        );
51005        assert_eq_m256i(r, e);
51006    }
51007
51008    #[simd_test(enable = "avx512f")]
51009    fn test_mm512_mask_cvtps_ph() {
51010        let a = _mm512_set1_ps(1.);
51011        let src = _mm256_set1_epi16(0);
51012        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
51013        assert_eq_m256i(r, src);
51014        let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51015        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
51016        assert_eq_m256i(r, e);
51017    }
51018
51019    #[simd_test(enable = "avx512f")]
51020    fn test_mm512_maskz_cvtps_ph() {
51021        let a = _mm512_set1_ps(1.);
51022        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
51023        assert_eq_m256i(r, _mm256_setzero_si256());
51024        let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51025        let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
51026        assert_eq_m256i(r, e);
51027    }
51028
51029    #[simd_test(enable = "avx512f,avx512vl")]
51030    fn test_mm256_mask_cvtps_ph() {
51031        let a = _mm256_set1_ps(1.);
51032        let src = _mm_set1_epi16(0);
51033        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
51034        assert_eq_m128i(r, src);
51035        let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
51036        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51037        assert_eq_m128i(r, e);
51038    }
51039
51040    #[simd_test(enable = "avx512f,avx512vl")]
51041    fn test_mm256_maskz_cvtps_ph() {
51042        let a = _mm256_set1_ps(1.);
51043        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
51044        assert_eq_m128i(r, _mm_setzero_si128());
51045        let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
51046        let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51047        assert_eq_m128i(r, e);
51048    }
51049
51050    #[simd_test(enable = "avx512f,avx512vl")]
51051    fn test_mm_mask_cvtps_ph() {
51052        let a = _mm_set1_ps(1.);
51053        let src = _mm_set1_epi16(0);
51054        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
51055        assert_eq_m128i(r, src);
51056        let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
51057        let e = _mm_setr_epi64x(4323521613979991040, 0);
51058        assert_eq_m128i(r, e);
51059    }
51060
51061    #[simd_test(enable = "avx512f,avx512vl")]
51062    fn test_mm_maskz_cvtps_ph() {
51063        let a = _mm_set1_ps(1.);
51064        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
51065        assert_eq_m128i(r, _mm_setzero_si128());
51066        let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
51067        let e = _mm_setr_epi64x(4323521613979991040, 0);
51068        assert_eq_m128i(r, e);
51069    }
51070
51071    #[simd_test(enable = "avx512f")]
51072    fn test_mm512_cvt_roundph_ps() {
51073        let a = _mm256_setr_epi64x(
51074            4323521613979991040,
51075            4323521613979991040,
51076            4323521613979991040,
51077            4323521613979991040,
51078        );
51079        let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
51080        let e = _mm512_set1_ps(1.);
51081        assert_eq_m512(r, e);
51082    }
51083
51084    #[simd_test(enable = "avx512f")]
51085    fn test_mm512_mask_cvt_roundph_ps() {
51086        let a = _mm256_setr_epi64x(
51087            4323521613979991040,
51088            4323521613979991040,
51089            4323521613979991040,
51090            4323521613979991040,
51091        );
51092        let src = _mm512_set1_ps(0.);
51093        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
51094        assert_eq_m512(r, src);
51095        let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51096        let e = _mm512_setr_ps(
51097            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51098        );
51099        assert_eq_m512(r, e);
51100    }
51101
51102    #[simd_test(enable = "avx512f")]
51103    fn test_mm512_maskz_cvt_roundph_ps() {
51104        let a = _mm256_setr_epi64x(
51105            4323521613979991040,
51106            4323521613979991040,
51107            4323521613979991040,
51108            4323521613979991040,
51109        );
51110        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
51111        assert_eq_m512(r, _mm512_setzero_ps());
51112        let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51113        let e = _mm512_setr_ps(
51114            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51115        );
51116        assert_eq_m512(r, e);
51117    }
51118
51119    #[simd_test(enable = "avx512f")]
51120    fn test_mm512_cvtph_ps() {
51121        let a = _mm256_setr_epi64x(
51122            4323521613979991040,
51123            4323521613979991040,
51124            4323521613979991040,
51125            4323521613979991040,
51126        );
51127        let r = _mm512_cvtph_ps(a);
51128        let e = _mm512_set1_ps(1.);
51129        assert_eq_m512(r, e);
51130    }
51131
51132    #[simd_test(enable = "avx512f")]
51133    fn test_mm512_mask_cvtph_ps() {
51134        let a = _mm256_setr_epi64x(
51135            4323521613979991040,
51136            4323521613979991040,
51137            4323521613979991040,
51138            4323521613979991040,
51139        );
51140        let src = _mm512_set1_ps(0.);
51141        let r = _mm512_mask_cvtph_ps(src, 0, a);
51142        assert_eq_m512(r, src);
51143        let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
51144        let e = _mm512_setr_ps(
51145            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51146        );
51147        assert_eq_m512(r, e);
51148    }
51149
51150    #[simd_test(enable = "avx512f")]
51151    fn test_mm512_maskz_cvtph_ps() {
51152        let a = _mm256_setr_epi64x(
51153            4323521613979991040,
51154            4323521613979991040,
51155            4323521613979991040,
51156            4323521613979991040,
51157        );
51158        let r = _mm512_maskz_cvtph_ps(0, a);
51159        assert_eq_m512(r, _mm512_setzero_ps());
51160        let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
51161        let e = _mm512_setr_ps(
51162            1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
51163        );
51164        assert_eq_m512(r, e);
51165    }
51166
51167    #[simd_test(enable = "avx512f,avx512vl")]
51168    fn test_mm256_mask_cvtph_ps() {
51169        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51170        let src = _mm256_set1_ps(0.);
51171        let r = _mm256_mask_cvtph_ps(src, 0, a);
51172        assert_eq_m256(r, src);
51173        let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
51174        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
51175        assert_eq_m256(r, e);
51176    }
51177
51178    #[simd_test(enable = "avx512f,avx512vl")]
51179    fn test_mm256_maskz_cvtph_ps() {
51180        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51181        let r = _mm256_maskz_cvtph_ps(0, a);
51182        assert_eq_m256(r, _mm256_setzero_ps());
51183        let r = _mm256_maskz_cvtph_ps(0b11111111, a);
51184        let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
51185        assert_eq_m256(r, e);
51186    }
51187
51188    #[simd_test(enable = "avx512f,avx512vl")]
51189    fn test_mm_mask_cvtph_ps() {
51190        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51191        let src = _mm_set1_ps(0.);
51192        let r = _mm_mask_cvtph_ps(src, 0, a);
51193        assert_eq_m128(r, src);
51194        let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
51195        let e = _mm_setr_ps(1., 1., 1., 1.);
51196        assert_eq_m128(r, e);
51197    }
51198
51199    #[simd_test(enable = "avx512f,avx512vl")]
51200    fn test_mm_maskz_cvtph_ps() {
51201        let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
51202        let r = _mm_maskz_cvtph_ps(0, a);
51203        assert_eq_m128(r, _mm_setzero_ps());
51204        let r = _mm_maskz_cvtph_ps(0b00001111, a);
51205        let e = _mm_setr_ps(1., 1., 1., 1.);
51206        assert_eq_m128(r, e);
51207    }
51208
51209    #[simd_test(enable = "avx512f")]
51210    fn test_mm512_cvtt_roundps_epi32() {
51211        let a = _mm512_setr_ps(
51212            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51213        );
51214        let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
51215        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
51216        assert_eq_m512i(r, e);
51217    }
51218
51219    #[simd_test(enable = "avx512f")]
51220    fn test_mm512_mask_cvtt_roundps_epi32() {
51221        let a = _mm512_setr_ps(
51222            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51223        );
51224        let src = _mm512_set1_epi32(0);
51225        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
51226        assert_eq_m512i(r, src);
51227        let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51228        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51229        assert_eq_m512i(r, e);
51230    }
51231
51232    #[simd_test(enable = "avx512f")]
51233    fn test_mm512_maskz_cvtt_roundps_epi32() {
51234        let a = _mm512_setr_ps(
51235            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51236        );
51237        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
51238        assert_eq_m512i(r, _mm512_setzero_si512());
51239        let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51240        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51241        assert_eq_m512i(r, e);
51242    }
51243
51244    #[simd_test(enable = "avx512f")]
51245    fn test_mm512_cvtt_roundps_epu32() {
51246        let a = _mm512_setr_ps(
51247            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51248        );
51249        let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
51250        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
51251        assert_eq_m512i(r, e);
51252    }
51253
51254    #[simd_test(enable = "avx512f")]
51255    fn test_mm512_mask_cvtt_roundps_epu32() {
51256        let a = _mm512_setr_ps(
51257            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51258        );
51259        let src = _mm512_set1_epi32(0);
51260        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
51261        assert_eq_m512i(r, src);
51262        let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
51263        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51264        assert_eq_m512i(r, e);
51265    }
51266
51267    #[simd_test(enable = "avx512f")]
51268    fn test_mm512_maskz_cvtt_roundps_epu32() {
51269        let a = _mm512_setr_ps(
51270            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51271        );
51272        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
51273        assert_eq_m512i(r, _mm512_setzero_si512());
51274        let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
51275        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51276        assert_eq_m512i(r, e);
51277    }
51278
51279    #[simd_test(enable = "avx512f")]
51280    fn test_mm512_cvttps_epi32() {
51281        let a = _mm512_setr_ps(
51282            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51283        );
51284        let r = _mm512_cvttps_epi32(a);
51285        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
51286        assert_eq_m512i(r, e);
51287    }
51288
51289    #[simd_test(enable = "avx512f")]
51290    fn test_mm512_mask_cvttps_epi32() {
51291        let a = _mm512_setr_ps(
51292            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51293        );
51294        let src = _mm512_set1_epi32(0);
51295        let r = _mm512_mask_cvttps_epi32(src, 0, a);
51296        assert_eq_m512i(r, src);
51297        let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
51298        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51299        assert_eq_m512i(r, e);
51300    }
51301
51302    #[simd_test(enable = "avx512f")]
51303    fn test_mm512_maskz_cvttps_epi32() {
51304        let a = _mm512_setr_ps(
51305            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51306        );
51307        let r = _mm512_maskz_cvttps_epi32(0, a);
51308        assert_eq_m512i(r, _mm512_setzero_si512());
51309        let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
51310        let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
51311        assert_eq_m512i(r, e);
51312    }
51313
51314    #[simd_test(enable = "avx512f,avx512vl")]
51315    fn test_mm256_mask_cvttps_epi32() {
51316        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51317        let src = _mm256_set1_epi32(0);
51318        let r = _mm256_mask_cvttps_epi32(src, 0, a);
51319        assert_eq_m256i(r, src);
51320        let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
51321        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51322        assert_eq_m256i(r, e);
51323    }
51324
51325    #[simd_test(enable = "avx512f,avx512vl")]
51326    fn test_mm256_maskz_cvttps_epi32() {
51327        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51328        let r = _mm256_maskz_cvttps_epi32(0, a);
51329        assert_eq_m256i(r, _mm256_setzero_si256());
51330        let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
51331        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51332        assert_eq_m256i(r, e);
51333    }
51334
51335    #[simd_test(enable = "avx512f,avx512vl")]
51336    fn test_mm_mask_cvttps_epi32() {
51337        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51338        let src = _mm_set1_epi32(0);
51339        let r = _mm_mask_cvttps_epi32(src, 0, a);
51340        assert_eq_m128i(r, src);
51341        let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
51342        let e = _mm_set_epi32(12, 13, 14, 15);
51343        assert_eq_m128i(r, e);
51344    }
51345
51346    #[simd_test(enable = "avx512f,avx512vl")]
51347    fn test_mm_maskz_cvttps_epi32() {
51348        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51349        let r = _mm_maskz_cvttps_epi32(0, a);
51350        assert_eq_m128i(r, _mm_setzero_si128());
51351        let r = _mm_maskz_cvttps_epi32(0b00001111, a);
51352        let e = _mm_set_epi32(12, 13, 14, 15);
51353        assert_eq_m128i(r, e);
51354    }
51355
51356    #[simd_test(enable = "avx512f")]
51357    fn test_mm512_cvttps_epu32() {
51358        let a = _mm512_setr_ps(
51359            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51360        );
51361        let r = _mm512_cvttps_epu32(a);
51362        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
51363        assert_eq_m512i(r, e);
51364    }
51365
51366    #[simd_test(enable = "avx512f")]
51367    fn test_mm512_mask_cvttps_epu32() {
51368        let a = _mm512_setr_ps(
51369            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51370        );
51371        let src = _mm512_set1_epi32(0);
51372        let r = _mm512_mask_cvttps_epu32(src, 0, a);
51373        assert_eq_m512i(r, src);
51374        let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
51375        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51376        assert_eq_m512i(r, e);
51377    }
51378
51379    #[simd_test(enable = "avx512f")]
51380    fn test_mm512_maskz_cvttps_epu32() {
51381        let a = _mm512_setr_ps(
51382            0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
51383        );
51384        let r = _mm512_maskz_cvttps_epu32(0, a);
51385        assert_eq_m512i(r, _mm512_setzero_si512());
51386        let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
51387        let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
51388        assert_eq_m512i(r, e);
51389    }
51390
51391    #[simd_test(enable = "avx512f,avx512vl")]
51392    fn test_mm256_cvttps_epu32() {
51393        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51394        let r = _mm256_cvttps_epu32(a);
51395        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51396        assert_eq_m256i(r, e);
51397    }
51398
51399    #[simd_test(enable = "avx512f,avx512vl")]
51400    fn test_mm256_mask_cvttps_epu32() {
51401        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51402        let src = _mm256_set1_epi32(0);
51403        let r = _mm256_mask_cvttps_epu32(src, 0, a);
51404        assert_eq_m256i(r, src);
51405        let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
51406        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51407        assert_eq_m256i(r, e);
51408    }
51409
51410    #[simd_test(enable = "avx512f,avx512vl")]
51411    fn test_mm256_maskz_cvttps_epu32() {
51412        let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
51413        let r = _mm256_maskz_cvttps_epu32(0, a);
51414        assert_eq_m256i(r, _mm256_setzero_si256());
51415        let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
51416        let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
51417        assert_eq_m256i(r, e);
51418    }
51419
51420    #[simd_test(enable = "avx512f,avx512vl")]
51421    fn test_mm_cvttps_epu32() {
51422        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51423        let r = _mm_cvttps_epu32(a);
51424        let e = _mm_set_epi32(12, 13, 14, 15);
51425        assert_eq_m128i(r, e);
51426    }
51427
51428    #[simd_test(enable = "avx512f,avx512vl")]
51429    fn test_mm_mask_cvttps_epu32() {
51430        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51431        let src = _mm_set1_epi32(0);
51432        let r = _mm_mask_cvttps_epu32(src, 0, a);
51433        assert_eq_m128i(r, src);
51434        let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
51435        let e = _mm_set_epi32(12, 13, 14, 15);
51436        assert_eq_m128i(r, e);
51437    }
51438
51439    #[simd_test(enable = "avx512f,avx512vl")]
51440    fn test_mm_maskz_cvttps_epu32() {
51441        let a = _mm_set_ps(12., 13.5, 14., 15.5);
51442        let r = _mm_maskz_cvttps_epu32(0, a);
51443        assert_eq_m128i(r, _mm_setzero_si128());
51444        let r = _mm_maskz_cvttps_epu32(0b00001111, a);
51445        let e = _mm_set_epi32(12, 13, 14, 15);
51446        assert_eq_m128i(r, e);
51447    }
51448
51449    #[simd_test(enable = "avx512f")]
51450    unsafe fn test_mm512_i32gather_ps() {
51451        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
51452        // A multiplier of 4 is word-addressing
51453        #[rustfmt::skip]
51454        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51455                                      120, 128, 136, 144, 152, 160, 168, 176);
51456        let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr());
51457        #[rustfmt::skip]
51458        assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
51459                                         120., 128., 136., 144., 152., 160., 168., 176.));
51460    }
51461
51462    #[simd_test(enable = "avx512f")]
51463    unsafe fn test_mm512_mask_i32gather_ps() {
51464        let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
51465        let src = _mm512_set1_ps(2.);
51466        let mask = 0b10101010_10101010;
51467        #[rustfmt::skip]
51468        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51469                                      120, 128, 136, 144, 152, 160, 168, 176);
51470        // A multiplier of 4 is word-addressing
51471        let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr());
51472        #[rustfmt::skip]
51473        assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
51474                                         2., 128., 2., 144., 2., 160., 2., 176.));
51475    }
51476
51477    #[simd_test(enable = "avx512f")]
51478    unsafe fn test_mm512_i32gather_epi32() {
51479        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
51480        // A multiplier of 4 is word-addressing
51481        #[rustfmt::skip]
51482        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51483                                      120, 128, 136, 144, 152, 160, 168, 176);
51484        let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr());
51485        #[rustfmt::skip]
51486        assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51487                                             120, 128, 136, 144, 152, 160, 168, 176));
51488    }
51489
51490    #[simd_test(enable = "avx512f")]
51491    unsafe fn test_mm512_mask_i32gather_epi32() {
51492        let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
51493        let src = _mm512_set1_epi32(2);
51494        let mask = 0b10101010_10101010;
51495        let index = _mm512_setr_epi32(
51496            0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
51497        );
51498        // A multiplier of 4 is word-addressing
51499        let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr());
51500        assert_eq_m512i(
51501            r,
51502            _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
51503        );
51504    }
51505
51506    #[simd_test(enable = "avx512f")]
51507    unsafe fn test_mm512_i32scatter_ps() {
51508        let mut arr = [0f32; 256];
51509        #[rustfmt::skip]
51510        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51511                                      128, 144, 160, 176, 192, 208, 224, 240);
51512        let src = _mm512_setr_ps(
51513            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
51514        );
51515        // A multiplier of 4 is word-addressing
51516        _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src);
51517        let mut expected = [0f32; 256];
51518        for i in 0..16 {
51519            expected[i * 16] = (i + 1) as f32;
51520        }
51521        assert_eq!(&arr[..], &expected[..],);
51522    }
51523
51524    #[simd_test(enable = "avx512f")]
51525    unsafe fn test_mm512_mask_i32scatter_ps() {
51526        let mut arr = [0f32; 256];
51527        let mask = 0b10101010_10101010;
51528        #[rustfmt::skip]
51529        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51530                                      128, 144, 160, 176, 192, 208, 224, 240);
51531        let src = _mm512_setr_ps(
51532            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
51533        );
51534        // A multiplier of 4 is word-addressing
51535        _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
51536        let mut expected = [0f32; 256];
51537        for i in 0..8 {
51538            expected[i * 32 + 16] = 2. * (i + 1) as f32;
51539        }
51540        assert_eq!(&arr[..], &expected[..],);
51541    }
51542
51543    #[simd_test(enable = "avx512f")]
51544    unsafe fn test_mm512_i32scatter_epi32() {
51545        let mut arr = [0i32; 256];
51546        #[rustfmt::skip]
51547
51548        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51549                                      128, 144, 160, 176, 192, 208, 224, 240);
51550        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51551        // A multiplier of 4 is word-addressing
51552        _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
51553        let mut expected = [0i32; 256];
51554        for i in 0..16 {
51555            expected[i * 16] = (i + 1) as i32;
51556        }
51557        assert_eq!(&arr[..], &expected[..],);
51558    }
51559
51560    #[simd_test(enable = "avx512f")]
51561    unsafe fn test_mm512_mask_i32scatter_epi32() {
51562        let mut arr = [0i32; 256];
51563        let mask = 0b10101010_10101010;
51564        #[rustfmt::skip]
51565        let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
51566                                      128, 144, 160, 176, 192, 208, 224, 240);
51567        let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51568        // A multiplier of 4 is word-addressing
51569        _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
51570        let mut expected = [0i32; 256];
51571        for i in 0..8 {
51572            expected[i * 32 + 16] = 2 * (i + 1) as i32;
51573        }
51574        assert_eq!(&arr[..], &expected[..],);
51575    }
51576
51577    #[simd_test(enable = "avx512f")]
51578    fn test_mm512_cmplt_ps_mask() {
51579        #[rustfmt::skip]
51580        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51581                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51582        let b = _mm512_set1_ps(-1.);
51583        let m = _mm512_cmplt_ps_mask(a, b);
51584        assert_eq!(m, 0b00000101_00000101);
51585    }
51586
51587    #[simd_test(enable = "avx512f")]
51588    fn test_mm512_mask_cmplt_ps_mask() {
51589        #[rustfmt::skip]
51590        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51591                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51592        let b = _mm512_set1_ps(-1.);
51593        let mask = 0b01100110_01100110;
51594        let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
51595        assert_eq!(r, 0b00000100_00000100);
51596    }
51597
51598    #[simd_test(enable = "avx512f")]
51599    fn test_mm512_cmpnlt_ps_mask() {
51600        #[rustfmt::skip]
51601        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51602                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51603        let b = _mm512_set1_ps(-1.);
51604        assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
51605    }
51606
51607    #[simd_test(enable = "avx512f")]
51608    fn test_mm512_mask_cmpnlt_ps_mask() {
51609        #[rustfmt::skip]
51610        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51611                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51612        let b = _mm512_set1_ps(-1.);
51613        let mask = 0b01111010_01111010;
51614        assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
51615    }
51616
51617    #[simd_test(enable = "avx512f")]
51618    fn test_mm512_cmpnle_ps_mask() {
51619        #[rustfmt::skip]
51620        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51621                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51622        let b = _mm512_set1_ps(-1.);
51623        let m = _mm512_cmpnle_ps_mask(b, a);
51624        assert_eq!(m, 0b00001101_00001101);
51625    }
51626
51627    #[simd_test(enable = "avx512f")]
51628    fn test_mm512_mask_cmpnle_ps_mask() {
51629        #[rustfmt::skip]
51630        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51631                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51632        let b = _mm512_set1_ps(-1.);
51633        let mask = 0b01100110_01100110;
51634        let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
51635        assert_eq!(r, 0b00000100_00000100);
51636    }
51637
51638    #[simd_test(enable = "avx512f")]
51639    fn test_mm512_cmple_ps_mask() {
51640        #[rustfmt::skip]
51641        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51642                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51643        let b = _mm512_set1_ps(-1.);
51644        assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
51645    }
51646
51647    #[simd_test(enable = "avx512f")]
51648    fn test_mm512_mask_cmple_ps_mask() {
51649        #[rustfmt::skip]
51650        let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
51651                              0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
51652        let b = _mm512_set1_ps(-1.);
51653        let mask = 0b01111010_01111010;
51654        assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
51655    }
51656
51657    #[simd_test(enable = "avx512f")]
51658    fn test_mm512_cmpeq_ps_mask() {
51659        #[rustfmt::skip]
51660        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51661                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51662        #[rustfmt::skip]
51663        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51664                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51665        let m = _mm512_cmpeq_ps_mask(b, a);
51666        assert_eq!(m, 0b11001101_11001101);
51667    }
51668
51669    #[simd_test(enable = "avx512f")]
51670    fn test_mm512_mask_cmpeq_ps_mask() {
51671        #[rustfmt::skip]
51672        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51673                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51674        #[rustfmt::skip]
51675        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51676                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51677        let mask = 0b01111010_01111010;
51678        let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
51679        assert_eq!(r, 0b01001000_01001000);
51680    }
51681
51682    #[simd_test(enable = "avx512f")]
51683    fn test_mm512_cmpneq_ps_mask() {
51684        #[rustfmt::skip]
51685        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51686                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51687        #[rustfmt::skip]
51688        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51689                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51690        let m = _mm512_cmpneq_ps_mask(b, a);
51691        assert_eq!(m, 0b00110010_00110010);
51692    }
51693
51694    #[simd_test(enable = "avx512f")]
51695    fn test_mm512_mask_cmpneq_ps_mask() {
51696        #[rustfmt::skip]
51697        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
51698                              0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
51699        #[rustfmt::skip]
51700        let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
51701                              0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
51702        let mask = 0b01111010_01111010;
51703        let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
51704        assert_eq!(r, 0b00110010_00110010)
51705    }
51706
51707    #[simd_test(enable = "avx512f")]
51708    fn test_mm512_cmp_ps_mask() {
51709        #[rustfmt::skip]
51710        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51711                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51712        let b = _mm512_set1_ps(-1.);
51713        let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51714        assert_eq!(m, 0b00000101_00000101);
51715    }
51716
51717    #[simd_test(enable = "avx512f")]
51718    fn test_mm512_mask_cmp_ps_mask() {
51719        #[rustfmt::skip]
51720        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51721                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51722        let b = _mm512_set1_ps(-1.);
51723        let mask = 0b01100110_01100110;
51724        let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51725        assert_eq!(r, 0b00000100_00000100);
51726    }
51727
51728    #[simd_test(enable = "avx512f,avx512vl")]
51729    fn test_mm256_cmp_ps_mask() {
51730        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51731        let b = _mm256_set1_ps(-1.);
51732        let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51733        assert_eq!(m, 0b00000101);
51734    }
51735
51736    #[simd_test(enable = "avx512f,avx512vl")]
51737    fn test_mm256_mask_cmp_ps_mask() {
51738        let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51739        let b = _mm256_set1_ps(-1.);
51740        let mask = 0b01100110;
51741        let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51742        assert_eq!(r, 0b00000100);
51743    }
51744
51745    #[simd_test(enable = "avx512f,avx512vl")]
51746    fn test_mm_cmp_ps_mask() {
51747        let a = _mm_set_ps(0., 1., -1., 13.);
51748        let b = _mm_set1_ps(1.);
51749        let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
51750        assert_eq!(m, 0b00001010);
51751    }
51752
51753    #[simd_test(enable = "avx512f,avx512vl")]
51754    fn test_mm_mask_cmp_ps_mask() {
51755        let a = _mm_set_ps(0., 1., -1., 13.);
51756        let b = _mm_set1_ps(1.);
51757        let mask = 0b11111111;
51758        let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
51759        assert_eq!(r, 0b00001010);
51760    }
51761
51762    #[simd_test(enable = "avx512f")]
51763    fn test_mm512_cmp_round_ps_mask() {
51764        #[rustfmt::skip]
51765        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51766                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51767        let b = _mm512_set1_ps(-1.);
51768        let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
51769        assert_eq!(m, 0b00000101_00000101);
51770    }
51771
51772    #[simd_test(enable = "avx512f")]
51773    fn test_mm512_mask_cmp_round_ps_mask() {
51774        #[rustfmt::skip]
51775        let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
51776                              0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
51777        let b = _mm512_set1_ps(-1.);
51778        let mask = 0b01100110_01100110;
51779        let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
51780        assert_eq!(r, 0b00000100_00000100);
51781    }
51782
51783    #[simd_test(enable = "avx512f")]
51784    fn test_mm512_cmpord_ps_mask() {
51785        #[rustfmt::skip]
51786        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51787                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51788        #[rustfmt::skip]
51789        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51790                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51791        let m = _mm512_cmpord_ps_mask(a, b);
51792        assert_eq!(m, 0b00000101_00000101);
51793    }
51794
51795    #[simd_test(enable = "avx512f")]
51796    fn test_mm512_mask_cmpord_ps_mask() {
51797        #[rustfmt::skip]
51798        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51799                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51800        #[rustfmt::skip]
51801        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51802                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51803        let mask = 0b11000011_11000011;
51804        let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
51805        assert_eq!(m, 0b00000001_00000001);
51806    }
51807
51808    #[simd_test(enable = "avx512f")]
51809    fn test_mm512_cmpunord_ps_mask() {
51810        #[rustfmt::skip]
51811        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51812                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51813        #[rustfmt::skip]
51814        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51815                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51816        let m = _mm512_cmpunord_ps_mask(a, b);
51817
51818        assert_eq!(m, 0b11111010_11111010);
51819    }
51820
51821    #[simd_test(enable = "avx512f")]
51822    fn test_mm512_mask_cmpunord_ps_mask() {
51823        #[rustfmt::skip]
51824        let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
51825                              f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
51826        #[rustfmt::skip]
51827        let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
51828                              f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
51829        let mask = 0b00001111_00001111;
51830        let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
51831        assert_eq!(m, 0b000001010_00001010);
51832    }
51833
51834    #[simd_test(enable = "avx512f")]
51835    fn test_mm_cmp_ss_mask() {
51836        let a = _mm_setr_ps(2., 1., 1., 1.);
51837        let b = _mm_setr_ps(1., 2., 2., 2.);
51838        let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
51839        assert_eq!(m, 1);
51840    }
51841
51842    #[simd_test(enable = "avx512f")]
51843    fn test_mm_mask_cmp_ss_mask() {
51844        let a = _mm_setr_ps(2., 1., 1., 1.);
51845        let b = _mm_setr_ps(1., 2., 2., 2.);
51846        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
51847        assert_eq!(m, 0);
51848        let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
51849        assert_eq!(m, 1);
51850    }
51851
51852    #[simd_test(enable = "avx512f")]
51853    fn test_mm_cmp_round_ss_mask() {
51854        let a = _mm_setr_ps(2., 1., 1., 1.);
51855        let b = _mm_setr_ps(1., 2., 2., 2.);
51856        let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
51857        assert_eq!(m, 1);
51858    }
51859
51860    #[simd_test(enable = "avx512f")]
51861    fn test_mm_mask_cmp_round_ss_mask() {
51862        let a = _mm_setr_ps(2., 1., 1., 1.);
51863        let b = _mm_setr_ps(1., 2., 2., 2.);
51864        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
51865        assert_eq!(m, 0);
51866        let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
51867        assert_eq!(m, 1);
51868    }
51869
51870    #[simd_test(enable = "avx512f")]
51871    fn test_mm_cmp_sd_mask() {
51872        let a = _mm_setr_pd(2., 1.);
51873        let b = _mm_setr_pd(1., 2.);
51874        let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
51875        assert_eq!(m, 1);
51876    }
51877
51878    #[simd_test(enable = "avx512f")]
51879    fn test_mm_mask_cmp_sd_mask() {
51880        let a = _mm_setr_pd(2., 1.);
51881        let b = _mm_setr_pd(1., 2.);
51882        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
51883        assert_eq!(m, 0);
51884        let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
51885        assert_eq!(m, 1);
51886    }
51887
51888    #[simd_test(enable = "avx512f")]
51889    fn test_mm_cmp_round_sd_mask() {
51890        let a = _mm_setr_pd(2., 1.);
51891        let b = _mm_setr_pd(1., 2.);
51892        let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
51893        assert_eq!(m, 1);
51894    }
51895
51896    #[simd_test(enable = "avx512f")]
51897    fn test_mm_mask_cmp_round_sd_mask() {
51898        let a = _mm_setr_pd(2., 1.);
51899        let b = _mm_setr_pd(1., 2.);
51900        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
51901        assert_eq!(m, 0);
51902        let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
51903        assert_eq!(m, 1);
51904    }
51905
51906    #[simd_test(enable = "avx512f")]
51907    const fn test_mm512_cmplt_epu32_mask() {
51908        #[rustfmt::skip]
51909        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51910                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51911        let b = _mm512_set1_epi32(-1);
51912        let m = _mm512_cmplt_epu32_mask(a, b);
51913        assert_eq!(m, 0b11001111_11001111);
51914    }
51915
51916    #[simd_test(enable = "avx512f")]
51917    const fn test_mm512_mask_cmplt_epu32_mask() {
51918        #[rustfmt::skip]
51919        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51920                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51921        let b = _mm512_set1_epi32(-1);
51922        let mask = 0b01111010_01111010;
51923        let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
51924        assert_eq!(r, 0b01001010_01001010);
51925    }
51926
51927    #[simd_test(enable = "avx512f,avx512vl")]
51928    const fn test_mm256_cmplt_epu32_mask() {
51929        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
51930        let b = _mm256_set1_epi32(1);
51931        let r = _mm256_cmplt_epu32_mask(a, b);
51932        assert_eq!(r, 0b10000000);
51933    }
51934
51935    #[simd_test(enable = "avx512f,avx512vl")]
51936    const fn test_mm256_mask_cmplt_epu32_mask() {
51937        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
51938        let b = _mm256_set1_epi32(1);
51939        let mask = 0b11111111;
51940        let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
51941        assert_eq!(r, 0b10000000);
51942    }
51943
51944    #[simd_test(enable = "avx512f,avx512vl")]
51945    const fn test_mm_cmplt_epu32_mask() {
51946        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
51947        let b = _mm_set1_epi32(1);
51948        let r = _mm_cmplt_epu32_mask(a, b);
51949        assert_eq!(r, 0b00001000);
51950    }
51951
51952    #[simd_test(enable = "avx512f,avx512vl")]
51953    const fn test_mm_mask_cmplt_epu32_mask() {
51954        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
51955        let b = _mm_set1_epi32(1);
51956        let mask = 0b11111111;
51957        let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
51958        assert_eq!(r, 0b00001000);
51959    }
51960
51961    #[simd_test(enable = "avx512f")]
51962    const fn test_mm512_cmpgt_epu32_mask() {
51963        #[rustfmt::skip]
51964        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51965                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51966        let b = _mm512_set1_epi32(-1);
51967        let m = _mm512_cmpgt_epu32_mask(b, a);
51968        assert_eq!(m, 0b11001111_11001111);
51969    }
51970
51971    #[simd_test(enable = "avx512f")]
51972    const fn test_mm512_mask_cmpgt_epu32_mask() {
51973        #[rustfmt::skip]
51974        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
51975                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
51976        let b = _mm512_set1_epi32(-1);
51977        let mask = 0b01111010_01111010;
51978        let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
51979        assert_eq!(r, 0b01001010_01001010);
51980    }
51981
51982    #[simd_test(enable = "avx512f,avx512vl")]
51983    const fn test_mm256_cmpgt_epu32_mask() {
51984        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
51985        let b = _mm256_set1_epi32(1);
51986        let r = _mm256_cmpgt_epu32_mask(a, b);
51987        assert_eq!(r, 0b00111111);
51988    }
51989
51990    #[simd_test(enable = "avx512f,avx512vl")]
51991    const fn test_mm256_mask_cmpgt_epu32_mask() {
51992        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
51993        let b = _mm256_set1_epi32(1);
51994        let mask = 0b11111111;
51995        let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
51996        assert_eq!(r, 0b00111111);
51997    }
51998
51999    #[simd_test(enable = "avx512f,avx512vl")]
52000    const fn test_mm_cmpgt_epu32_mask() {
52001        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52002        let b = _mm_set1_epi32(1);
52003        let r = _mm_cmpgt_epu32_mask(a, b);
52004        assert_eq!(r, 0b00000011);
52005    }
52006
52007    #[simd_test(enable = "avx512f,avx512vl")]
52008    const fn test_mm_mask_cmpgt_epu32_mask() {
52009        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52010        let b = _mm_set1_epi32(1);
52011        let mask = 0b11111111;
52012        let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
52013        assert_eq!(r, 0b00000011);
52014    }
52015
52016    #[simd_test(enable = "avx512f")]
52017    const fn test_mm512_cmple_epu32_mask() {
52018        #[rustfmt::skip]
52019        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52020                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52021        let b = _mm512_set1_epi32(-1);
52022        assert_eq!(
52023            _mm512_cmple_epu32_mask(a, b),
52024            !_mm512_cmpgt_epu32_mask(a, b)
52025        )
52026    }
52027
52028    #[simd_test(enable = "avx512f")]
52029    const fn test_mm512_mask_cmple_epu32_mask() {
52030        #[rustfmt::skip]
52031        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52032                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52033        let b = _mm512_set1_epi32(-1);
52034        let mask = 0b01111010_01111010;
52035        assert_eq!(
52036            _mm512_mask_cmple_epu32_mask(mask, a, b),
52037            0b01111010_01111010
52038        );
52039    }
52040
52041    #[simd_test(enable = "avx512f,avx512vl")]
52042    const fn test_mm256_cmple_epu32_mask() {
52043        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
52044        let b = _mm256_set1_epi32(1);
52045        let r = _mm256_cmple_epu32_mask(a, b);
52046        assert_eq!(r, 0b11000000)
52047    }
52048
52049    #[simd_test(enable = "avx512f,avx512vl")]
52050    const fn test_mm256_mask_cmple_epu32_mask() {
52051        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
52052        let b = _mm256_set1_epi32(1);
52053        let mask = 0b11111111;
52054        let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
52055        assert_eq!(r, 0b11000000)
52056    }
52057
52058    #[simd_test(enable = "avx512f,avx512vl")]
52059    const fn test_mm_cmple_epu32_mask() {
52060        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52061        let b = _mm_set1_epi32(1);
52062        let r = _mm_cmple_epu32_mask(a, b);
52063        assert_eq!(r, 0b00001100)
52064    }
52065
52066    #[simd_test(enable = "avx512f,avx512vl")]
52067    const fn test_mm_mask_cmple_epu32_mask() {
52068        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52069        let b = _mm_set1_epi32(1);
52070        let mask = 0b11111111;
52071        let r = _mm_mask_cmple_epu32_mask(mask, a, b);
52072        assert_eq!(r, 0b00001100)
52073    }
52074
52075    #[simd_test(enable = "avx512f")]
52076    const fn test_mm512_cmpge_epu32_mask() {
52077        #[rustfmt::skip]
52078        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52079                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52080        let b = _mm512_set1_epi32(-1);
52081        assert_eq!(
52082            _mm512_cmpge_epu32_mask(a, b),
52083            !_mm512_cmplt_epu32_mask(a, b)
52084        )
52085    }
52086
52087    #[simd_test(enable = "avx512f")]
52088    const fn test_mm512_mask_cmpge_epu32_mask() {
52089        #[rustfmt::skip]
52090        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52091                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52092        let b = _mm512_set1_epi32(-1);
52093        let mask = 0b01111010_01111010;
52094        assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
52095    }
52096
52097    #[simd_test(enable = "avx512f,avx512vl")]
52098    const fn test_mm256_cmpge_epu32_mask() {
52099        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
52100        let b = _mm256_set1_epi32(1);
52101        let r = _mm256_cmpge_epu32_mask(a, b);
52102        assert_eq!(r, 0b01111111)
52103    }
52104
52105    #[simd_test(enable = "avx512f,avx512vl")]
52106    const fn test_mm256_mask_cmpge_epu32_mask() {
52107        let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
52108        let b = _mm256_set1_epi32(1);
52109        let mask = 0b11111111;
52110        let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
52111        assert_eq!(r, 0b01111111)
52112    }
52113
52114    #[simd_test(enable = "avx512f,avx512vl")]
52115    const fn test_mm_cmpge_epu32_mask() {
52116        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52117        let b = _mm_set1_epi32(1);
52118        let r = _mm_cmpge_epu32_mask(a, b);
52119        assert_eq!(r, 0b00000111)
52120    }
52121
52122    #[simd_test(enable = "avx512f,avx512vl")]
52123    const fn test_mm_mask_cmpge_epu32_mask() {
52124        let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
52125        let b = _mm_set1_epi32(1);
52126        let mask = 0b11111111;
52127        let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
52128        assert_eq!(r, 0b00000111)
52129    }
52130
52131    #[simd_test(enable = "avx512f")]
52132    const fn test_mm512_cmpeq_epu32_mask() {
52133        #[rustfmt::skip]
52134        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52135                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52136        #[rustfmt::skip]
52137        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52138                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52139        let m = _mm512_cmpeq_epu32_mask(b, a);
52140        assert_eq!(m, 0b11001111_11001111);
52141    }
52142
52143    #[simd_test(enable = "avx512f")]
52144    const fn test_mm512_mask_cmpeq_epu32_mask() {
52145        #[rustfmt::skip]
52146        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52147                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52148        #[rustfmt::skip]
52149        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52150                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52151        let mask = 0b01111010_01111010;
52152        let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
52153        assert_eq!(r, 0b01001010_01001010);
52154    }
52155
52156    #[simd_test(enable = "avx512f,avx512vl")]
52157    const fn test_mm256_cmpeq_epu32_mask() {
52158        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52159        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52160        let m = _mm256_cmpeq_epu32_mask(b, a);
52161        assert_eq!(m, 0b11001111);
52162    }
52163
52164    #[simd_test(enable = "avx512f,avx512vl")]
52165    const fn test_mm256_mask_cmpeq_epu32_mask() {
52166        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52167        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52168        let mask = 0b01111010;
52169        let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
52170        assert_eq!(r, 0b01001010);
52171    }
52172
52173    #[simd_test(enable = "avx512f,avx512vl")]
52174    const fn test_mm_cmpeq_epu32_mask() {
52175        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52176        let b = _mm_set_epi32(0, 1, 13, 42);
52177        let m = _mm_cmpeq_epu32_mask(b, a);
52178        assert_eq!(m, 0b00001100);
52179    }
52180
52181    #[simd_test(enable = "avx512f,avx512vl")]
52182    const fn test_mm_mask_cmpeq_epu32_mask() {
52183        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52184        let b = _mm_set_epi32(0, 1, 13, 42);
52185        let mask = 0b11111111;
52186        let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
52187        assert_eq!(r, 0b00001100);
52188    }
52189
52190    #[simd_test(enable = "avx512f")]
52191    const fn test_mm512_cmpneq_epu32_mask() {
52192        #[rustfmt::skip]
52193        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52194                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52195        #[rustfmt::skip]
52196        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52197                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52198        let m = _mm512_cmpneq_epu32_mask(b, a);
52199        assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
52200    }
52201
52202    #[simd_test(enable = "avx512f")]
52203    const fn test_mm512_mask_cmpneq_epu32_mask() {
52204        #[rustfmt::skip]
52205        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
52206                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
52207        #[rustfmt::skip]
52208        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52209                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52210        let mask = 0b01111010_01111010;
52211        let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
52212        assert_eq!(r, 0b00110010_00110010);
52213    }
52214
52215    #[simd_test(enable = "avx512f,avx512vl")]
52216    const fn test_mm256_cmpneq_epu32_mask() {
52217        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
52218        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
52219        let r = _mm256_cmpneq_epu32_mask(b, a);
52220        assert_eq!(r, 0b00110000);
52221    }
52222
52223    #[simd_test(enable = "avx512f,avx512vl")]
52224    const fn test_mm256_mask_cmpneq_epu32_mask() {
52225        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
52226        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
52227        let mask = 0b11111111;
52228        let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
52229        assert_eq!(r, 0b00110000);
52230    }
52231
52232    #[simd_test(enable = "avx512f,avx512vl")]
52233    const fn test_mm_cmpneq_epu32_mask() {
52234        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52235        let b = _mm_set_epi32(0, 1, 13, 42);
52236        let r = _mm_cmpneq_epu32_mask(b, a);
52237        assert_eq!(r, 0b00000011);
52238    }
52239
52240    #[simd_test(enable = "avx512f,avx512vl")]
52241    const fn test_mm_mask_cmpneq_epu32_mask() {
52242        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52243        let b = _mm_set_epi32(0, 1, 13, 42);
52244        let mask = 0b11111111;
52245        let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
52246        assert_eq!(r, 0b00000011);
52247    }
52248
52249    #[simd_test(enable = "avx512f")]
52250    const fn test_mm512_cmp_epu32_mask() {
52251        #[rustfmt::skip]
52252        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52253                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52254        let b = _mm512_set1_epi32(-1);
52255        let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52256        assert_eq!(m, 0b11001111_11001111);
52257    }
52258
52259    #[simd_test(enable = "avx512f")]
52260    const fn test_mm512_mask_cmp_epu32_mask() {
52261        #[rustfmt::skip]
52262        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52263                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52264        let b = _mm512_set1_epi32(-1);
52265        let mask = 0b01111010_01111010;
52266        let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52267        assert_eq!(r, 0b01001010_01001010);
52268    }
52269
52270    #[simd_test(enable = "avx512f,avx512vl")]
52271    const fn test_mm256_cmp_epu32_mask() {
52272        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52273        let b = _mm256_set1_epi32(-1);
52274        let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52275        assert_eq!(m, 0b11001111);
52276    }
52277
52278    #[simd_test(enable = "avx512f,avx512vl")]
52279    const fn test_mm256_mask_cmp_epu32_mask() {
52280        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52281        let b = _mm256_set1_epi32(-1);
52282        let mask = 0b11111111;
52283        let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52284        assert_eq!(r, 0b11001111);
52285    }
52286
52287    #[simd_test(enable = "avx512f,avx512vl")]
52288    const fn test_mm_cmp_epu32_mask() {
52289        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
52290        let b = _mm_set1_epi32(1);
52291        let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
52292        assert_eq!(m, 0b00001000);
52293    }
52294
52295    #[simd_test(enable = "avx512f,avx512vl")]
52296    const fn test_mm_mask_cmp_epu32_mask() {
52297        let a = _mm_set_epi32(0, 1, -1, i32::MAX);
52298        let b = _mm_set1_epi32(1);
52299        let mask = 0b11111111;
52300        let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
52301        assert_eq!(r, 0b00001000);
52302    }
52303
52304    #[simd_test(enable = "avx512f")]
52305    const fn test_mm512_cmplt_epi32_mask() {
52306        #[rustfmt::skip]
52307        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52308                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52309        let b = _mm512_set1_epi32(-1);
52310        let m = _mm512_cmplt_epi32_mask(a, b);
52311        assert_eq!(m, 0b00000101_00000101);
52312    }
52313
52314    #[simd_test(enable = "avx512f")]
52315    const fn test_mm512_mask_cmplt_epi32_mask() {
52316        #[rustfmt::skip]
52317        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52318                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52319        let b = _mm512_set1_epi32(-1);
52320        let mask = 0b01100110_01100110;
52321        let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
52322        assert_eq!(r, 0b00000100_00000100);
52323    }
52324
52325    #[simd_test(enable = "avx512f,avx512vl")]
52326    const fn test_mm256_cmplt_epi32_mask() {
52327        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
52328        let b = _mm256_set1_epi32(-1);
52329        let r = _mm256_cmplt_epi32_mask(a, b);
52330        assert_eq!(r, 0b00000101);
52331    }
52332
52333    #[simd_test(enable = "avx512f,avx512vl")]
52334    const fn test_mm256_mask_cmplt_epi32_mask() {
52335        let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
52336        let b = _mm256_set1_epi32(-1);
52337        let mask = 0b11111111;
52338        let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
52339        assert_eq!(r, 0b00000101);
52340    }
52341
52342    #[simd_test(enable = "avx512f,avx512vl")]
52343    const fn test_mm_cmplt_epi32_mask() {
52344        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
52345        let b = _mm_set1_epi32(-1);
52346        let r = _mm_cmplt_epi32_mask(a, b);
52347        assert_eq!(r, 0b00000101);
52348    }
52349
52350    #[simd_test(enable = "avx512f,avx512vl")]
52351    const fn test_mm_mask_cmplt_epi32_mask() {
52352        let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
52353        let b = _mm_set1_epi32(-1);
52354        let mask = 0b11111111;
52355        let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
52356        assert_eq!(r, 0b00000101);
52357    }
52358
52359    #[simd_test(enable = "avx512f")]
52360    const fn test_mm512_cmpgt_epi32_mask() {
52361        #[rustfmt::skip]
52362        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52363                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52364        let b = _mm512_set1_epi32(-1);
52365        let m = _mm512_cmpgt_epi32_mask(b, a);
52366        assert_eq!(m, 0b00000101_00000101);
52367    }
52368
52369    #[simd_test(enable = "avx512f")]
52370    const fn test_mm512_mask_cmpgt_epi32_mask() {
52371        #[rustfmt::skip]
52372        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52373                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52374        let b = _mm512_set1_epi32(-1);
52375        let mask = 0b01100110_01100110;
52376        let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
52377        assert_eq!(r, 0b00000100_00000100);
52378    }
52379
52380    #[simd_test(enable = "avx512f,avx512vl")]
52381    const fn test_mm256_cmpgt_epi32_mask() {
52382        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52383        let b = _mm256_set1_epi32(-1);
52384        let r = _mm256_cmpgt_epi32_mask(a, b);
52385        assert_eq!(r, 0b11011010);
52386    }
52387
52388    #[simd_test(enable = "avx512f,avx512vl")]
52389    const fn test_mm256_mask_cmpgt_epi32_mask() {
52390        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52391        let b = _mm256_set1_epi32(-1);
52392        let mask = 0b11111111;
52393        let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
52394        assert_eq!(r, 0b11011010);
52395    }
52396
52397    #[simd_test(enable = "avx512f,avx512vl")]
52398    const fn test_mm_cmpgt_epi32_mask() {
52399        let a = _mm_set_epi32(0, 1, -1, 13);
52400        let b = _mm_set1_epi32(-1);
52401        let r = _mm_cmpgt_epi32_mask(a, b);
52402        assert_eq!(r, 0b00001101);
52403    }
52404
52405    #[simd_test(enable = "avx512f,avx512vl")]
52406    const fn test_mm_mask_cmpgt_epi32_mask() {
52407        let a = _mm_set_epi32(0, 1, -1, 13);
52408        let b = _mm_set1_epi32(-1);
52409        let mask = 0b11111111;
52410        let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
52411        assert_eq!(r, 0b00001101);
52412    }
52413
52414    #[simd_test(enable = "avx512f")]
52415    const fn test_mm512_cmple_epi32_mask() {
52416        #[rustfmt::skip]
52417        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52418                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52419        let b = _mm512_set1_epi32(-1);
52420        assert_eq!(
52421            _mm512_cmple_epi32_mask(a, b),
52422            !_mm512_cmpgt_epi32_mask(a, b)
52423        )
52424    }
52425
52426    #[simd_test(enable = "avx512f")]
52427    const fn test_mm512_mask_cmple_epi32_mask() {
52428        #[rustfmt::skip]
52429        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52430                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52431        let b = _mm512_set1_epi32(-1);
52432        let mask = 0b01111010_01111010;
52433        assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
52434    }
52435
52436    #[simd_test(enable = "avx512f,avx512vl")]
52437    const fn test_mm256_cmple_epi32_mask() {
52438        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
52439        let b = _mm256_set1_epi32(-1);
52440        let r = _mm256_cmple_epi32_mask(a, b);
52441        assert_eq!(r, 0b00100101)
52442    }
52443
52444    #[simd_test(enable = "avx512f,avx512vl")]
52445    const fn test_mm256_mask_cmple_epi32_mask() {
52446        let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
52447        let b = _mm256_set1_epi32(-1);
52448        let mask = 0b11111111;
52449        let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
52450        assert_eq!(r, 0b00100101)
52451    }
52452
52453    #[simd_test(enable = "avx512f,avx512vl")]
52454    const fn test_mm_cmple_epi32_mask() {
52455        let a = _mm_set_epi32(0, 1, -1, 200);
52456        let b = _mm_set1_epi32(-1);
52457        let r = _mm_cmple_epi32_mask(a, b);
52458        assert_eq!(r, 0b00000010)
52459    }
52460
52461    #[simd_test(enable = "avx512f,avx512vl")]
52462    const fn test_mm_mask_cmple_epi32_mask() {
52463        let a = _mm_set_epi32(0, 1, -1, 200);
52464        let b = _mm_set1_epi32(-1);
52465        let mask = 0b11111111;
52466        let r = _mm_mask_cmple_epi32_mask(mask, a, b);
52467        assert_eq!(r, 0b00000010)
52468    }
52469
52470    #[simd_test(enable = "avx512f")]
52471    const fn test_mm512_cmpge_epi32_mask() {
52472        #[rustfmt::skip]
52473        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52474                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52475        let b = _mm512_set1_epi32(-1);
52476        assert_eq!(
52477            _mm512_cmpge_epi32_mask(a, b),
52478            !_mm512_cmplt_epi32_mask(a, b)
52479        )
52480    }
52481
52482    #[simd_test(enable = "avx512f")]
52483    const fn test_mm512_mask_cmpge_epi32_mask() {
52484        #[rustfmt::skip]
52485        let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
52486                                 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52487        let b = _mm512_set1_epi32(-1);
52488        let mask = 0b01111010_01111010;
52489        assert_eq!(
52490            _mm512_mask_cmpge_epi32_mask(mask, a, b),
52491            0b01111010_01111010
52492        );
52493    }
52494
52495    #[simd_test(enable = "avx512f,avx512vl")]
52496    const fn test_mm256_cmpge_epi32_mask() {
52497        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52498        let b = _mm256_set1_epi32(-1);
52499        let r = _mm256_cmpge_epi32_mask(a, b);
52500        assert_eq!(r, 0b11111010)
52501    }
52502
52503    #[simd_test(enable = "avx512f,avx512vl")]
52504    const fn test_mm256_mask_cmpge_epi32_mask() {
52505        let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
52506        let b = _mm256_set1_epi32(-1);
52507        let mask = 0b11111111;
52508        let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
52509        assert_eq!(r, 0b11111010)
52510    }
52511
52512    #[simd_test(enable = "avx512f,avx512vl")]
52513    const fn test_mm_cmpge_epi32_mask() {
52514        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52515        let b = _mm_set1_epi32(-1);
52516        let r = _mm_cmpge_epi32_mask(a, b);
52517        assert_eq!(r, 0b00001111)
52518    }
52519
52520    #[simd_test(enable = "avx512f,avx512vl")]
52521    const fn test_mm_mask_cmpge_epi32_mask() {
52522        let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
52523        let b = _mm_set1_epi32(-1);
52524        let mask = 0b11111111;
52525        let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
52526        assert_eq!(r, 0b00001111)
52527    }
52528
52529    #[simd_test(enable = "avx512f")]
52530    const fn test_mm512_cmpeq_epi32_mask() {
52531        #[rustfmt::skip]
52532        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52533                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52534        #[rustfmt::skip]
52535        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52536                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52537        let m = _mm512_cmpeq_epi32_mask(b, a);
52538        assert_eq!(m, 0b11001111_11001111);
52539    }
52540
52541    #[simd_test(enable = "avx512f")]
52542    const fn test_mm512_mask_cmpeq_epi32_mask() {
52543        #[rustfmt::skip]
52544        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52545                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52546        #[rustfmt::skip]
52547        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52548                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52549        let mask = 0b01111010_01111010;
52550        let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
52551        assert_eq!(r, 0b01001010_01001010);
52552    }
52553
52554    #[simd_test(enable = "avx512f,avx512vl")]
52555    const fn test_mm256_cmpeq_epi32_mask() {
52556        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52557        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52558        let m = _mm256_cmpeq_epi32_mask(b, a);
52559        assert_eq!(m, 0b11001111);
52560    }
52561
52562    #[simd_test(enable = "avx512f,avx512vl")]
52563    const fn test_mm256_mask_cmpeq_epi32_mask() {
52564        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52565        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52566        let mask = 0b01111010;
52567        let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
52568        assert_eq!(r, 0b01001010);
52569    }
52570
52571    #[simd_test(enable = "avx512f,avx512vl")]
52572    const fn test_mm_cmpeq_epi32_mask() {
52573        let a = _mm_set_epi32(0, 1, -1, 13);
52574        let b = _mm_set_epi32(0, 1, 13, 42);
52575        let m = _mm_cmpeq_epi32_mask(b, a);
52576        assert_eq!(m, 0b00001100);
52577    }
52578
52579    #[simd_test(enable = "avx512f,avx512vl")]
52580    const fn test_mm_mask_cmpeq_epi32_mask() {
52581        let a = _mm_set_epi32(0, 1, -1, 13);
52582        let b = _mm_set_epi32(0, 1, 13, 42);
52583        let mask = 0b11111111;
52584        let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
52585        assert_eq!(r, 0b00001100);
52586    }
52587
52588    #[simd_test(enable = "avx512f")]
52589    const fn test_mm512_cmpneq_epi32_mask() {
52590        #[rustfmt::skip]
52591        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52592                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52593        #[rustfmt::skip]
52594        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52595                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52596        let m = _mm512_cmpneq_epi32_mask(b, a);
52597        assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
52598    }
52599
52600    #[simd_test(enable = "avx512f")]
52601    const fn test_mm512_mask_cmpneq_epi32_mask() {
52602        #[rustfmt::skip]
52603        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
52604                                 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
52605        #[rustfmt::skip]
52606        let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
52607                                 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52608        let mask = 0b01111010_01111010;
52609        let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
52610        assert_eq!(r, 0b00110010_00110010)
52611    }
52612
52613    #[simd_test(enable = "avx512f,avx512vl")]
52614    const fn test_mm256_cmpneq_epi32_mask() {
52615        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52616        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52617        let m = _mm256_cmpneq_epi32_mask(b, a);
52618        assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
52619    }
52620
52621    #[simd_test(enable = "avx512f,avx512vl")]
52622    const fn test_mm256_mask_cmpneq_epi32_mask() {
52623        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
52624        let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
52625        let mask = 0b11111111;
52626        let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
52627        assert_eq!(r, 0b00110011)
52628    }
52629
52630    #[simd_test(enable = "avx512f,avx512vl")]
52631    const fn test_mm_cmpneq_epi32_mask() {
52632        let a = _mm_set_epi32(0, 1, -1, 13);
52633        let b = _mm_set_epi32(0, 1, 13, 42);
52634        let r = _mm_cmpneq_epi32_mask(b, a);
52635        assert_eq!(r, 0b00000011)
52636    }
52637
52638    #[simd_test(enable = "avx512f,avx512vl")]
52639    const fn test_mm_mask_cmpneq_epi32_mask() {
52640        let a = _mm_set_epi32(0, 1, -1, 13);
52641        let b = _mm_set_epi32(0, 1, 13, 42);
52642        let mask = 0b11111111;
52643        let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
52644        assert_eq!(r, 0b00000011)
52645    }
52646
52647    #[simd_test(enable = "avx512f")]
52648    const fn test_mm512_cmp_epi32_mask() {
52649        #[rustfmt::skip]
52650        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52651                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52652        let b = _mm512_set1_epi32(-1);
52653        let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52654        assert_eq!(m, 0b00000101_00000101);
52655    }
52656
52657    #[simd_test(enable = "avx512f")]
52658    const fn test_mm512_mask_cmp_epi32_mask() {
52659        #[rustfmt::skip]
52660        let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
52661                                 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52662        let b = _mm512_set1_epi32(-1);
52663        let mask = 0b01100110_01100110;
52664        let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52665        assert_eq!(r, 0b00000100_00000100);
52666    }
52667
52668    #[simd_test(enable = "avx512f,avx512vl")]
52669    const fn test_mm256_cmp_epi32_mask() {
52670        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52671        let b = _mm256_set1_epi32(-1);
52672        let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52673        assert_eq!(m, 0b00000101);
52674    }
52675
52676    #[simd_test(enable = "avx512f,avx512vl")]
52677    const fn test_mm256_mask_cmp_epi32_mask() {
52678        let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
52679        let b = _mm256_set1_epi32(-1);
52680        let mask = 0b01100110;
52681        let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52682        assert_eq!(r, 0b00000100);
52683    }
52684
52685    #[simd_test(enable = "avx512f,avx512vl")]
52686    const fn test_mm_cmp_epi32_mask() {
52687        let a = _mm_set_epi32(0, 1, -1, 13);
52688        let b = _mm_set1_epi32(1);
52689        let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
52690        assert_eq!(m, 0b00001010);
52691    }
52692
52693    #[simd_test(enable = "avx512f,avx512vl")]
52694    const fn test_mm_mask_cmp_epi32_mask() {
52695        let a = _mm_set_epi32(0, 1, -1, 13);
52696        let b = _mm_set1_epi32(1);
52697        let mask = 0b11111111;
52698        let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
52699        assert_eq!(r, 0b00001010);
52700    }
52701
52702    #[simd_test(enable = "avx512f")]
52703    const fn test_mm512_set_epi8() {
52704        let r = _mm512_set1_epi8(2);
52705        assert_eq_m512i(
52706            r,
52707            _mm512_set_epi8(
52708                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52709                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52710                2, 2, 2, 2, 2, 2, 2, 2,
52711            ),
52712        )
52713    }
52714
52715    #[simd_test(enable = "avx512f")]
52716    const fn test_mm512_set_epi16() {
52717        let r = _mm512_set1_epi16(2);
52718        assert_eq_m512i(
52719            r,
52720            _mm512_set_epi16(
52721                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52722                2, 2, 2, 2,
52723            ),
52724        )
52725    }
52726
52727    #[simd_test(enable = "avx512f")]
52728    const fn test_mm512_set_epi32() {
52729        let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
52730        assert_eq_m512i(
52731            r,
52732            _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
52733        )
52734    }
52735
52736    #[simd_test(enable = "avx512f")]
52737    const fn test_mm512_setr_epi32() {
52738        let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
52739        assert_eq_m512i(
52740            r,
52741            _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
52742        )
52743    }
52744
52745    #[simd_test(enable = "avx512f")]
52746    const fn test_mm512_set1_epi8() {
52747        let r = _mm512_set_epi8(
52748            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52749            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52750            2, 2, 2, 2, 2, 2,
52751        );
52752        assert_eq_m512i(r, _mm512_set1_epi8(2));
52753    }
52754
52755    #[simd_test(enable = "avx512f")]
52756    const fn test_mm512_set1_epi16() {
52757        let r = _mm512_set_epi16(
52758            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
52759            2, 2, 2,
52760        );
52761        assert_eq_m512i(r, _mm512_set1_epi16(2));
52762    }
52763
52764    #[simd_test(enable = "avx512f")]
52765    const fn test_mm512_set1_epi32() {
52766        let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52767        assert_eq_m512i(r, _mm512_set1_epi32(2));
52768    }
52769
52770    #[simd_test(enable = "avx512f")]
52771    const fn test_mm512_setzero_si512() {
52772        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
52773    }
52774
52775    #[simd_test(enable = "avx512f")]
52776    const fn test_mm512_setzero_epi32() {
52777        assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
52778    }
52779
52780    #[simd_test(enable = "avx512f")]
52781    const fn test_mm512_set_ps() {
52782        let r = _mm512_setr_ps(
52783            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
52784        );
52785        assert_eq_m512(
52786            r,
52787            _mm512_set_ps(
52788                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
52789            ),
52790        )
52791    }
52792
52793    #[simd_test(enable = "avx512f")]
52794    const fn test_mm512_setr_ps() {
52795        let r = _mm512_set_ps(
52796            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
52797        );
52798        assert_eq_m512(
52799            r,
52800            _mm512_setr_ps(
52801                15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
52802            ),
52803        )
52804    }
52805
52806    #[simd_test(enable = "avx512f")]
52807    const fn test_mm512_set1_ps() {
52808        #[rustfmt::skip]
52809        let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
52810                                     2., 2., 2., 2., 2., 2., 2., 2.);
52811        assert_eq_m512(expected, _mm512_set1_ps(2.));
52812    }
52813
52814    #[simd_test(enable = "avx512f")]
52815    const fn test_mm512_set4_epi32() {
52816        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
52817        assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
52818    }
52819
52820    #[simd_test(enable = "avx512f")]
52821    const fn test_mm512_set4_ps() {
52822        let r = _mm512_set_ps(
52823            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
52824        );
52825        assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
52826    }
52827
52828    #[simd_test(enable = "avx512f")]
52829    const fn test_mm512_setr4_epi32() {
52830        let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
52831        assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
52832    }
52833
52834    #[simd_test(enable = "avx512f")]
52835    const fn test_mm512_setr4_ps() {
52836        let r = _mm512_set_ps(
52837            4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
52838        );
52839        assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
52840    }
52841
52842    #[simd_test(enable = "avx512f")]
52843    const fn test_mm512_setzero_ps() {
52844        assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
52845    }
52846
52847    #[simd_test(enable = "avx512f")]
52848    const fn test_mm512_setzero() {
52849        assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
52850    }
52851
52852    #[simd_test(enable = "avx512f")]
52853    const unsafe fn test_mm512_loadu_pd() {
52854        let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
52855        let p = a.as_ptr();
52856        let r = _mm512_loadu_pd(black_box(p));
52857        let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
52858        assert_eq_m512d(r, e);
52859    }
52860
52861    #[simd_test(enable = "avx512f")]
52862    const unsafe fn test_mm512_storeu_pd() {
52863        let a = _mm512_set1_pd(9.);
52864        let mut r = _mm512_undefined_pd();
52865        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
52866        assert_eq_m512d(r, a);
52867    }
52868
52869    #[simd_test(enable = "avx512f")]
52870    const unsafe fn test_mm512_loadu_ps() {
52871        let a = &[
52872            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
52873        ];
52874        let p = a.as_ptr();
52875        let r = _mm512_loadu_ps(black_box(p));
52876        let e = _mm512_setr_ps(
52877            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
52878        );
52879        assert_eq_m512(r, e);
52880    }
52881
52882    #[simd_test(enable = "avx512f")]
52883    const unsafe fn test_mm512_storeu_ps() {
52884        let a = _mm512_set1_ps(9.);
52885        let mut r = _mm512_undefined_ps();
52886        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
52887        assert_eq_m512(r, a);
52888    }
52889
52890    #[simd_test(enable = "avx512f")]
52891    const unsafe fn test_mm512_mask_loadu_epi32() {
52892        let src = _mm512_set1_epi32(42);
52893        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
52894        let p = a.as_ptr();
52895        let m = 0b11101000_11001010;
52896        let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
52897        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52898        assert_eq_m512i(r, e);
52899    }
52900
52901    #[simd_test(enable = "avx512f")]
52902    const unsafe fn test_mm512_maskz_loadu_epi32() {
52903        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
52904        let p = a.as_ptr();
52905        let m = 0b11101000_11001010;
52906        let r = _mm512_maskz_loadu_epi32(m, black_box(p));
52907        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
52908        assert_eq_m512i(r, e);
52909    }
52910
52911    #[simd_test(enable = "avx512f")]
52912    const unsafe fn test_mm512_mask_load_epi32() {
52913        #[repr(align(64))]
52914        struct Align {
52915            data: [i32; 16], // 64 bytes
52916        }
52917        let src = _mm512_set1_epi32(42);
52918        let a = Align {
52919            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
52920        };
52921        let p = a.data.as_ptr();
52922        let m = 0b11101000_11001010;
52923        let r = _mm512_mask_load_epi32(src, m, black_box(p));
52924        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52925        assert_eq_m512i(r, e);
52926    }
52927
52928    #[simd_test(enable = "avx512f")]
52929    const unsafe fn test_mm512_maskz_load_epi32() {
52930        #[repr(align(64))]
52931        struct Align {
52932            data: [i32; 16], // 64 bytes
52933        }
52934        let a = Align {
52935            data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
52936        };
52937        let p = a.data.as_ptr();
52938        let m = 0b11101000_11001010;
52939        let r = _mm512_maskz_load_epi32(m, black_box(p));
52940        let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
52941        assert_eq_m512i(r, e);
52942    }
52943
52944    #[simd_test(enable = "avx512f")]
52945    const unsafe fn test_mm512_mask_storeu_epi32() {
52946        let mut r = [42_i32; 16];
52947        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52948        let m = 0b11101000_11001010;
52949        _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
52950        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52951        assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
52952    }
52953
52954    #[simd_test(enable = "avx512f")]
52955    const unsafe fn test_mm512_mask_store_epi32() {
52956        #[repr(align(64))]
52957        struct Align {
52958            data: [i32; 16],
52959        }
52960        let mut r = Align { data: [42; 16] };
52961        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
52962        let m = 0b11101000_11001010;
52963        _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
52964        let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
52965        assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
52966    }
52967
52968    #[simd_test(enable = "avx512f")]
52969    const unsafe fn test_mm512_mask_loadu_epi64() {
52970        let src = _mm512_set1_epi64(42);
52971        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
52972        let p = a.as_ptr();
52973        let m = 0b11001010;
52974        let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
52975        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
52976        assert_eq_m512i(r, e);
52977    }
52978
52979    #[simd_test(enable = "avx512f")]
52980    const unsafe fn test_mm512_maskz_loadu_epi64() {
52981        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
52982        let p = a.as_ptr();
52983        let m = 0b11001010;
52984        let r = _mm512_maskz_loadu_epi64(m, black_box(p));
52985        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
52986        assert_eq_m512i(r, e);
52987    }
52988
52989    #[simd_test(enable = "avx512f")]
52990    const unsafe fn test_mm512_mask_load_epi64() {
52991        #[repr(align(64))]
52992        struct Align {
52993            data: [i64; 8], // 64 bytes
52994        }
52995        let src = _mm512_set1_epi64(42);
52996        let a = Align {
52997            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
52998        };
52999        let p = a.data.as_ptr();
53000        let m = 0b11001010;
53001        let r = _mm512_mask_load_epi64(src, m, black_box(p));
53002        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
53003        assert_eq_m512i(r, e);
53004    }
53005
53006    #[simd_test(enable = "avx512f")]
53007    const unsafe fn test_mm512_maskz_load_epi64() {
53008        #[repr(align(64))]
53009        struct Align {
53010            data: [i64; 8], // 64 bytes
53011        }
53012        let a = Align {
53013            data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
53014        };
53015        let p = a.data.as_ptr();
53016        let m = 0b11001010;
53017        let r = _mm512_maskz_load_epi64(m, black_box(p));
53018        let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
53019        assert_eq_m512i(r, e);
53020    }
53021
53022    #[simd_test(enable = "avx512f")]
53023    const unsafe fn test_mm512_mask_storeu_epi64() {
53024        let mut r = [42_i64; 8];
53025        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
53026        let m = 0b11001010;
53027        _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53028        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
53029        assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
53030    }
53031
53032    #[simd_test(enable = "avx512f")]
53033    const unsafe fn test_mm512_mask_store_epi64() {
53034        #[repr(align(64))]
53035        struct Align {
53036            data: [i64; 8],
53037        }
53038        let mut r = Align { data: [42; 8] };
53039        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
53040        let m = 0b11001010;
53041        let p = r.data.as_mut_ptr();
53042        _mm512_mask_store_epi64(p, m, a);
53043        let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
53044        assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
53045    }
53046
53047    #[simd_test(enable = "avx512f")]
53048    const unsafe fn test_mm512_mask_loadu_ps() {
53049        let src = _mm512_set1_ps(42.0);
53050        let a = &[
53051            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
53052            16.0,
53053        ];
53054        let p = a.as_ptr();
53055        let m = 0b11101000_11001010;
53056        let r = _mm512_mask_loadu_ps(src, m, black_box(p));
53057        let e = _mm512_setr_ps(
53058            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53059            16.0,
53060        );
53061        assert_eq_m512(r, e);
53062    }
53063
53064    #[simd_test(enable = "avx512f")]
53065    const unsafe fn test_mm512_maskz_loadu_ps() {
53066        let a = &[
53067            1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
53068            16.0,
53069        ];
53070        let p = a.as_ptr();
53071        let m = 0b11101000_11001010;
53072        let r = _mm512_maskz_loadu_ps(m, black_box(p));
53073        let e = _mm512_setr_ps(
53074            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
53075        );
53076        assert_eq_m512(r, e);
53077    }
53078
53079    #[simd_test(enable = "avx512f")]
53080    const unsafe fn test_mm512_mask_load_ps() {
53081        #[repr(align(64))]
53082        struct Align {
53083            data: [f32; 16], // 64 bytes
53084        }
53085        let src = _mm512_set1_ps(42.0);
53086        let a = Align {
53087            data: [
53088                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
53089                15.0, 16.0,
53090            ],
53091        };
53092        let p = a.data.as_ptr();
53093        let m = 0b11101000_11001010;
53094        let r = _mm512_mask_load_ps(src, m, black_box(p));
53095        let e = _mm512_setr_ps(
53096            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53097            16.0,
53098        );
53099        assert_eq_m512(r, e);
53100    }
53101
53102    #[simd_test(enable = "avx512f")]
53103    const unsafe fn test_mm512_maskz_load_ps() {
53104        #[repr(align(64))]
53105        struct Align {
53106            data: [f32; 16], // 64 bytes
53107        }
53108        let a = Align {
53109            data: [
53110                1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
53111                15.0, 16.0,
53112            ],
53113        };
53114        let p = a.data.as_ptr();
53115        let m = 0b11101000_11001010;
53116        let r = _mm512_maskz_load_ps(m, black_box(p));
53117        let e = _mm512_setr_ps(
53118            0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
53119        );
53120        assert_eq_m512(r, e);
53121    }
53122
53123    #[simd_test(enable = "avx512f")]
53124    const unsafe fn test_mm512_mask_storeu_ps() {
53125        let mut r = [42_f32; 16];
53126        let a = _mm512_setr_ps(
53127            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
53128        );
53129        let m = 0b11101000_11001010;
53130        _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
53131        let e = _mm512_setr_ps(
53132            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53133            16.0,
53134        );
53135        assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
53136    }
53137
53138    #[simd_test(enable = "avx512f")]
53139    const unsafe fn test_mm512_mask_store_ps() {
53140        #[repr(align(64))]
53141        struct Align {
53142            data: [f32; 16],
53143        }
53144        let mut r = Align { data: [42.0; 16] };
53145        let a = _mm512_setr_ps(
53146            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
53147        );
53148        let m = 0b11101000_11001010;
53149        _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
53150        let e = _mm512_setr_ps(
53151            42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
53152            16.0,
53153        );
53154        assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
53155    }
53156
53157    #[simd_test(enable = "avx512f")]
53158    const unsafe fn test_mm512_mask_loadu_pd() {
53159        let src = _mm512_set1_pd(42.0);
53160        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53161        let p = a.as_ptr();
53162        let m = 0b11001010;
53163        let r = _mm512_mask_loadu_pd(src, m, black_box(p));
53164        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53165        assert_eq_m512d(r, e);
53166    }
53167
53168    #[simd_test(enable = "avx512f")]
53169    const unsafe fn test_mm512_maskz_loadu_pd() {
53170        let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53171        let p = a.as_ptr();
53172        let m = 0b11001010;
53173        let r = _mm512_maskz_loadu_pd(m, black_box(p));
53174        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53175        assert_eq_m512d(r, e);
53176    }
53177
53178    #[simd_test(enable = "avx512f")]
53179    const unsafe fn test_mm512_mask_load_pd() {
53180        #[repr(align(64))]
53181        struct Align {
53182            data: [f64; 8], // 64 bytes
53183        }
53184        let src = _mm512_set1_pd(42.0);
53185        let a = Align {
53186            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53187        };
53188        let p = a.data.as_ptr();
53189        let m = 0b11001010;
53190        let r = _mm512_mask_load_pd(src, m, black_box(p));
53191        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53192        assert_eq_m512d(r, e);
53193    }
53194
53195    #[simd_test(enable = "avx512f")]
53196    const unsafe fn test_mm512_maskz_load_pd() {
53197        #[repr(align(64))]
53198        struct Align {
53199            data: [f64; 8], // 64 bytes
53200        }
53201        let a = Align {
53202            data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53203        };
53204        let p = a.data.as_ptr();
53205        let m = 0b11001010;
53206        let r = _mm512_maskz_load_pd(m, black_box(p));
53207        let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53208        assert_eq_m512d(r, e);
53209    }
53210
53211    #[simd_test(enable = "avx512f")]
53212    const unsafe fn test_mm512_mask_storeu_pd() {
53213        let mut r = [42_f64; 8];
53214        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53215        let m = 0b11001010;
53216        _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
53217        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53218        assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
53219    }
53220
53221    #[simd_test(enable = "avx512f")]
53222    const unsafe fn test_mm512_mask_store_pd() {
53223        #[repr(align(64))]
53224        struct Align {
53225            data: [f64; 8],
53226        }
53227        let mut r = Align { data: [42.0; 8] };
53228        let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53229        let m = 0b11001010;
53230        _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
53231        let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53232        assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
53233    }
53234
53235    #[simd_test(enable = "avx512f,avx512vl")]
53236    const unsafe fn test_mm256_mask_loadu_epi32() {
53237        let src = _mm256_set1_epi32(42);
53238        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
53239        let p = a.as_ptr();
53240        let m = 0b11001010;
53241        let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
53242        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53243        assert_eq_m256i(r, e);
53244    }
53245
53246    #[simd_test(enable = "avx512f,avx512vl")]
53247    const unsafe fn test_mm256_maskz_loadu_epi32() {
53248        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
53249        let p = a.as_ptr();
53250        let m = 0b11001010;
53251        let r = _mm256_maskz_loadu_epi32(m, black_box(p));
53252        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
53253        assert_eq_m256i(r, e);
53254    }
53255
53256    #[simd_test(enable = "avx512f,avx512vl")]
53257    const unsafe fn test_mm256_mask_load_epi32() {
53258        #[repr(align(32))]
53259        struct Align {
53260            data: [i32; 8], // 32 bytes
53261        }
53262        let src = _mm256_set1_epi32(42);
53263        let a = Align {
53264            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
53265        };
53266        let p = a.data.as_ptr();
53267        let m = 0b11001010;
53268        let r = _mm256_mask_load_epi32(src, m, black_box(p));
53269        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53270        assert_eq_m256i(r, e);
53271    }
53272
53273    #[simd_test(enable = "avx512f,avx512vl")]
53274    const unsafe fn test_mm256_maskz_load_epi32() {
53275        #[repr(align(32))]
53276        struct Align {
53277            data: [i32; 8], // 32 bytes
53278        }
53279        let a = Align {
53280            data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
53281        };
53282        let p = a.data.as_ptr();
53283        let m = 0b11001010;
53284        let r = _mm256_maskz_load_epi32(m, black_box(p));
53285        let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
53286        assert_eq_m256i(r, e);
53287    }
53288
53289    #[simd_test(enable = "avx512f,avx512vl")]
53290    const unsafe fn test_mm256_mask_storeu_epi32() {
53291        let mut r = [42_i32; 8];
53292        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53293        let m = 0b11001010;
53294        _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
53295        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53296        assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
53297    }
53298
53299    #[simd_test(enable = "avx512f,avx512vl")]
53300    const unsafe fn test_mm256_mask_store_epi32() {
53301        #[repr(align(64))]
53302        struct Align {
53303            data: [i32; 8],
53304        }
53305        let mut r = Align { data: [42; 8] };
53306        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
53307        let m = 0b11001010;
53308        _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
53309        let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
53310        assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
53311    }
53312
53313    #[simd_test(enable = "avx512f,avx512vl")]
53314    const unsafe fn test_mm256_mask_loadu_epi64() {
53315        let src = _mm256_set1_epi64x(42);
53316        let a = &[1_i64, 2, 3, 4];
53317        let p = a.as_ptr();
53318        let m = 0b1010;
53319        let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
53320        let e = _mm256_setr_epi64x(42, 2, 42, 4);
53321        assert_eq_m256i(r, e);
53322    }
53323
53324    #[simd_test(enable = "avx512f,avx512vl")]
53325    const unsafe fn test_mm256_maskz_loadu_epi64() {
53326        let a = &[1_i64, 2, 3, 4];
53327        let p = a.as_ptr();
53328        let m = 0b1010;
53329        let r = _mm256_maskz_loadu_epi64(m, black_box(p));
53330        let e = _mm256_setr_epi64x(0, 2, 0, 4);
53331        assert_eq_m256i(r, e);
53332    }
53333
53334    #[simd_test(enable = "avx512f,avx512vl")]
53335    const unsafe fn test_mm256_mask_load_epi64() {
53336        #[repr(align(32))]
53337        struct Align {
53338            data: [i64; 4], // 32 bytes
53339        }
53340        let src = _mm256_set1_epi64x(42);
53341        let a = Align {
53342            data: [1_i64, 2, 3, 4],
53343        };
53344        let p = a.data.as_ptr();
53345        let m = 0b1010;
53346        let r = _mm256_mask_load_epi64(src, m, black_box(p));
53347        let e = _mm256_setr_epi64x(42, 2, 42, 4);
53348        assert_eq_m256i(r, e);
53349    }
53350
53351    #[simd_test(enable = "avx512f,avx512vl")]
53352    const unsafe fn test_mm256_maskz_load_epi64() {
53353        #[repr(align(32))]
53354        struct Align {
53355            data: [i64; 4], // 32 bytes
53356        }
53357        let a = Align {
53358            data: [1_i64, 2, 3, 4],
53359        };
53360        let p = a.data.as_ptr();
53361        let m = 0b1010;
53362        let r = _mm256_maskz_load_epi64(m, black_box(p));
53363        let e = _mm256_setr_epi64x(0, 2, 0, 4);
53364        assert_eq_m256i(r, e);
53365    }
53366
53367    #[simd_test(enable = "avx512f,avx512vl")]
53368    const unsafe fn test_mm256_mask_storeu_epi64() {
53369        let mut r = [42_i64; 4];
53370        let a = _mm256_setr_epi64x(1, 2, 3, 4);
53371        let m = 0b1010;
53372        _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53373        let e = _mm256_setr_epi64x(42, 2, 42, 4);
53374        assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
53375    }
53376
53377    #[simd_test(enable = "avx512f,avx512vl")]
53378    const unsafe fn test_mm256_mask_store_epi64() {
53379        #[repr(align(32))]
53380        struct Align {
53381            data: [i64; 4],
53382        }
53383        let mut r = Align { data: [42; 4] };
53384        let a = _mm256_setr_epi64x(1, 2, 3, 4);
53385        let m = 0b1010;
53386        _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
53387        let e = _mm256_setr_epi64x(42, 2, 42, 4);
53388        assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
53389    }
53390
53391    #[simd_test(enable = "avx512f,avx512vl")]
53392    const unsafe fn test_mm256_mask_loadu_ps() {
53393        let src = _mm256_set1_ps(42.0);
53394        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53395        let p = a.as_ptr();
53396        let m = 0b11001010;
53397        let r = _mm256_mask_loadu_ps(src, m, black_box(p));
53398        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53399        assert_eq_m256(r, e);
53400    }
53401
53402    #[simd_test(enable = "avx512f,avx512vl")]
53403    const unsafe fn test_mm256_maskz_loadu_ps() {
53404        let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
53405        let p = a.as_ptr();
53406        let m = 0b11001010;
53407        let r = _mm256_maskz_loadu_ps(m, black_box(p));
53408        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53409        assert_eq_m256(r, e);
53410    }
53411
53412    #[simd_test(enable = "avx512f,avx512vl")]
53413    const unsafe fn test_mm256_mask_load_ps() {
53414        #[repr(align(32))]
53415        struct Align {
53416            data: [f32; 8], // 32 bytes
53417        }
53418        let src = _mm256_set1_ps(42.0);
53419        let a = Align {
53420            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53421        };
53422        let p = a.data.as_ptr();
53423        let m = 0b11001010;
53424        let r = _mm256_mask_load_ps(src, m, black_box(p));
53425        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53426        assert_eq_m256(r, e);
53427    }
53428
53429    #[simd_test(enable = "avx512f,avx512vl")]
53430    const unsafe fn test_mm256_maskz_load_ps() {
53431        #[repr(align(32))]
53432        struct Align {
53433            data: [f32; 8], // 32 bytes
53434        }
53435        let a = Align {
53436            data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
53437        };
53438        let p = a.data.as_ptr();
53439        let m = 0b11001010;
53440        let r = _mm256_maskz_load_ps(m, black_box(p));
53441        let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
53442        assert_eq_m256(r, e);
53443    }
53444
53445    #[simd_test(enable = "avx512f,avx512vl")]
53446    const unsafe fn test_mm256_mask_storeu_ps() {
53447        let mut r = [42_f32; 8];
53448        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53449        let m = 0b11001010;
53450        _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
53451        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53452        assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
53453    }
53454
53455    #[simd_test(enable = "avx512f,avx512vl")]
53456    const unsafe fn test_mm256_mask_store_ps() {
53457        #[repr(align(32))]
53458        struct Align {
53459            data: [f32; 8],
53460        }
53461        let mut r = Align { data: [42.0; 8] };
53462        let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
53463        let m = 0b11001010;
53464        _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
53465        let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
53466        assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
53467    }
53468
53469    #[simd_test(enable = "avx512f,avx512vl")]
53470    const unsafe fn test_mm256_mask_loadu_pd() {
53471        let src = _mm256_set1_pd(42.0);
53472        let a = &[1.0_f64, 2.0, 3.0, 4.0];
53473        let p = a.as_ptr();
53474        let m = 0b1010;
53475        let r = _mm256_mask_loadu_pd(src, m, black_box(p));
53476        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53477        assert_eq_m256d(r, e);
53478    }
53479
53480    #[simd_test(enable = "avx512f,avx512vl")]
53481    const unsafe fn test_mm256_maskz_loadu_pd() {
53482        let a = &[1.0_f64, 2.0, 3.0, 4.0];
53483        let p = a.as_ptr();
53484        let m = 0b1010;
53485        let r = _mm256_maskz_loadu_pd(m, black_box(p));
53486        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
53487        assert_eq_m256d(r, e);
53488    }
53489
53490    #[simd_test(enable = "avx512f,avx512vl")]
53491    const unsafe fn test_mm256_mask_load_pd() {
53492        #[repr(align(32))]
53493        struct Align {
53494            data: [f64; 4], // 32 bytes
53495        }
53496        let src = _mm256_set1_pd(42.0);
53497        let a = Align {
53498            data: [1.0_f64, 2.0, 3.0, 4.0],
53499        };
53500        let p = a.data.as_ptr();
53501        let m = 0b1010;
53502        let r = _mm256_mask_load_pd(src, m, black_box(p));
53503        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53504        assert_eq_m256d(r, e);
53505    }
53506
53507    #[simd_test(enable = "avx512f,avx512vl")]
53508    const unsafe fn test_mm256_maskz_load_pd() {
53509        #[repr(align(32))]
53510        struct Align {
53511            data: [f64; 4], // 32 bytes
53512        }
53513        let a = Align {
53514            data: [1.0_f64, 2.0, 3.0, 4.0],
53515        };
53516        let p = a.data.as_ptr();
53517        let m = 0b1010;
53518        let r = _mm256_maskz_load_pd(m, black_box(p));
53519        let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
53520        assert_eq_m256d(r, e);
53521    }
53522
53523    #[simd_test(enable = "avx512f,avx512vl")]
53524    const unsafe fn test_mm256_mask_storeu_pd() {
53525        let mut r = [42_f64; 4];
53526        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
53527        let m = 0b1010;
53528        _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
53529        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53530        assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
53531    }
53532
53533    #[simd_test(enable = "avx512f,avx512vl")]
53534    const unsafe fn test_mm256_mask_store_pd() {
53535        #[repr(align(32))]
53536        struct Align {
53537            data: [f64; 4],
53538        }
53539        let mut r = Align { data: [42.0; 4] };
53540        let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
53541        let m = 0b1010;
53542        _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
53543        let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
53544        assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
53545    }
53546
53547    #[simd_test(enable = "avx512f,avx512vl")]
53548    const unsafe fn test_mm_mask_loadu_epi32() {
53549        let src = _mm_set1_epi32(42);
53550        let a = &[1_i32, 2, 3, 4];
53551        let p = a.as_ptr();
53552        let m = 0b1010;
53553        let r = _mm_mask_loadu_epi32(src, m, black_box(p));
53554        let e = _mm_setr_epi32(42, 2, 42, 4);
53555        assert_eq_m128i(r, e);
53556    }
53557
53558    #[simd_test(enable = "avx512f,avx512vl")]
53559    const unsafe fn test_mm_maskz_loadu_epi32() {
53560        let a = &[1_i32, 2, 3, 4];
53561        let p = a.as_ptr();
53562        let m = 0b1010;
53563        let r = _mm_maskz_loadu_epi32(m, black_box(p));
53564        let e = _mm_setr_epi32(0, 2, 0, 4);
53565        assert_eq_m128i(r, e);
53566    }
53567
53568    #[simd_test(enable = "avx512f,avx512vl")]
53569    const unsafe fn test_mm_mask_load_epi32() {
53570        #[repr(align(16))]
53571        struct Align {
53572            data: [i32; 4], // 32 bytes
53573        }
53574        let src = _mm_set1_epi32(42);
53575        let a = Align {
53576            data: [1_i32, 2, 3, 4],
53577        };
53578        let p = a.data.as_ptr();
53579        let m = 0b1010;
53580        let r = _mm_mask_load_epi32(src, m, black_box(p));
53581        let e = _mm_setr_epi32(42, 2, 42, 4);
53582        assert_eq_m128i(r, e);
53583    }
53584
53585    #[simd_test(enable = "avx512f,avx512vl")]
53586    const unsafe fn test_mm_maskz_load_epi32() {
53587        #[repr(align(16))]
53588        struct Align {
53589            data: [i32; 4], // 16 bytes
53590        }
53591        let a = Align {
53592            data: [1_i32, 2, 3, 4],
53593        };
53594        let p = a.data.as_ptr();
53595        let m = 0b1010;
53596        let r = _mm_maskz_load_epi32(m, black_box(p));
53597        let e = _mm_setr_epi32(0, 2, 0, 4);
53598        assert_eq_m128i(r, e);
53599    }
53600
53601    #[simd_test(enable = "avx512f,avx512vl")]
53602    const unsafe fn test_mm_mask_storeu_epi32() {
53603        let mut r = [42_i32; 4];
53604        let a = _mm_setr_epi32(1, 2, 3, 4);
53605        let m = 0b1010;
53606        _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
53607        let e = _mm_setr_epi32(42, 2, 42, 4);
53608        assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
53609    }
53610
53611    #[simd_test(enable = "avx512f,avx512vl")]
53612    const unsafe fn test_mm_mask_store_epi32() {
53613        #[repr(align(16))]
53614        struct Align {
53615            data: [i32; 4], // 16 bytes
53616        }
53617        let mut r = Align { data: [42; 4] };
53618        let a = _mm_setr_epi32(1, 2, 3, 4);
53619        let m = 0b1010;
53620        _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
53621        let e = _mm_setr_epi32(42, 2, 42, 4);
53622        assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
53623    }
53624
53625    #[simd_test(enable = "avx512f,avx512vl")]
53626    const unsafe fn test_mm_mask_loadu_epi64() {
53627        let src = _mm_set1_epi64x(42);
53628        let a = &[1_i64, 2];
53629        let p = a.as_ptr();
53630        let m = 0b10;
53631        let r = _mm_mask_loadu_epi64(src, m, black_box(p));
53632        let e = _mm_setr_epi64x(42, 2);
53633        assert_eq_m128i(r, e);
53634    }
53635
53636    #[simd_test(enable = "avx512f,avx512vl")]
53637    const unsafe fn test_mm_maskz_loadu_epi64() {
53638        let a = &[1_i64, 2];
53639        let p = a.as_ptr();
53640        let m = 0b10;
53641        let r = _mm_maskz_loadu_epi64(m, black_box(p));
53642        let e = _mm_setr_epi64x(0, 2);
53643        assert_eq_m128i(r, e);
53644    }
53645
53646    #[simd_test(enable = "avx512f,avx512vl")]
53647    const unsafe fn test_mm_mask_load_epi64() {
53648        #[repr(align(16))]
53649        struct Align {
53650            data: [i64; 2], // 16 bytes
53651        }
53652        let src = _mm_set1_epi64x(42);
53653        let a = Align { data: [1_i64, 2] };
53654        let p = a.data.as_ptr();
53655        let m = 0b10;
53656        let r = _mm_mask_load_epi64(src, m, black_box(p));
53657        let e = _mm_setr_epi64x(42, 2);
53658        assert_eq_m128i(r, e);
53659    }
53660
53661    #[simd_test(enable = "avx512f,avx512vl")]
53662    const unsafe fn test_mm_maskz_load_epi64() {
53663        #[repr(align(16))]
53664        struct Align {
53665            data: [i64; 2], // 16 bytes
53666        }
53667        let a = Align { data: [1_i64, 2] };
53668        let p = a.data.as_ptr();
53669        let m = 0b10;
53670        let r = _mm_maskz_load_epi64(m, black_box(p));
53671        let e = _mm_setr_epi64x(0, 2);
53672        assert_eq_m128i(r, e);
53673    }
53674
53675    #[simd_test(enable = "avx512f,avx512vl")]
53676    const unsafe fn test_mm_mask_storeu_epi64() {
53677        let mut r = [42_i64; 2];
53678        let a = _mm_setr_epi64x(1, 2);
53679        let m = 0b10;
53680        _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
53681        let e = _mm_setr_epi64x(42, 2);
53682        assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
53683    }
53684
53685    #[simd_test(enable = "avx512f,avx512vl")]
53686    const unsafe fn test_mm_mask_store_epi64() {
53687        #[repr(align(16))]
53688        struct Align {
53689            data: [i64; 2], // 16 bytes
53690        }
53691        let mut r = Align { data: [42; 2] };
53692        let a = _mm_setr_epi64x(1, 2);
53693        let m = 0b10;
53694        _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
53695        let e = _mm_setr_epi64x(42, 2);
53696        assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
53697    }
53698
53699    #[simd_test(enable = "avx512f,avx512vl")]
53700    const unsafe fn test_mm_mask_loadu_ps() {
53701        let src = _mm_set1_ps(42.0);
53702        let a = &[1.0_f32, 2.0, 3.0, 4.0];
53703        let p = a.as_ptr();
53704        let m = 0b1010;
53705        let r = _mm_mask_loadu_ps(src, m, black_box(p));
53706        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53707        assert_eq_m128(r, e);
53708    }
53709
53710    #[simd_test(enable = "avx512f,avx512vl")]
53711    const unsafe fn test_mm_maskz_loadu_ps() {
53712        let a = &[1.0_f32, 2.0, 3.0, 4.0];
53713        let p = a.as_ptr();
53714        let m = 0b1010;
53715        let r = _mm_maskz_loadu_ps(m, black_box(p));
53716        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
53717        assert_eq_m128(r, e);
53718    }
53719
53720    #[simd_test(enable = "avx512f,avx512vl")]
53721    const unsafe fn test_mm_mask_load_ps() {
53722        #[repr(align(16))]
53723        struct Align {
53724            data: [f32; 4], // 16 bytes
53725        }
53726        let src = _mm_set1_ps(42.0);
53727        let a = Align {
53728            data: [1.0_f32, 2.0, 3.0, 4.0],
53729        };
53730        let p = a.data.as_ptr();
53731        let m = 0b1010;
53732        let r = _mm_mask_load_ps(src, m, black_box(p));
53733        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53734        assert_eq_m128(r, e);
53735    }
53736
53737    #[simd_test(enable = "avx512f,avx512vl")]
53738    const unsafe fn test_mm_maskz_load_ps() {
53739        #[repr(align(16))]
53740        struct Align {
53741            data: [f32; 4], // 16 bytes
53742        }
53743        let a = Align {
53744            data: [1.0_f32, 2.0, 3.0, 4.0],
53745        };
53746        let p = a.data.as_ptr();
53747        let m = 0b1010;
53748        let r = _mm_maskz_load_ps(m, black_box(p));
53749        let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
53750        assert_eq_m128(r, e);
53751    }
53752
53753    #[simd_test(enable = "avx512f,avx512vl")]
53754    const unsafe fn test_mm_mask_storeu_ps() {
53755        let mut r = [42_f32; 4];
53756        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
53757        let m = 0b1010;
53758        _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
53759        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53760        assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
53761    }
53762
53763    #[simd_test(enable = "avx512f,avx512vl")]
53764    const unsafe fn test_mm_mask_store_ps() {
53765        #[repr(align(16))]
53766        struct Align {
53767            data: [f32; 4], // 16 bytes
53768        }
53769        let mut r = Align { data: [42.0; 4] };
53770        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
53771        let m = 0b1010;
53772        _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
53773        let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
53774        assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
53775    }
53776
53777    #[simd_test(enable = "avx512f,avx512vl")]
53778    const unsafe fn test_mm_mask_loadu_pd() {
53779        let src = _mm_set1_pd(42.0);
53780        let a = &[1.0_f64, 2.0];
53781        let p = a.as_ptr();
53782        let m = 0b10;
53783        let r = _mm_mask_loadu_pd(src, m, black_box(p));
53784        let e = _mm_setr_pd(42.0, 2.0);
53785        assert_eq_m128d(r, e);
53786    }
53787
53788    #[simd_test(enable = "avx512f,avx512vl")]
53789    const unsafe fn test_mm_maskz_loadu_pd() {
53790        let a = &[1.0_f64, 2.0];
53791        let p = a.as_ptr();
53792        let m = 0b10;
53793        let r = _mm_maskz_loadu_pd(m, black_box(p));
53794        let e = _mm_setr_pd(0.0, 2.0);
53795        assert_eq_m128d(r, e);
53796    }
53797
53798    #[simd_test(enable = "avx512f,avx512vl")]
53799    const unsafe fn test_mm_mask_load_pd() {
53800        #[repr(align(16))]
53801        struct Align {
53802            data: [f64; 2], // 16 bytes
53803        }
53804        let src = _mm_set1_pd(42.0);
53805        let a = Align {
53806            data: [1.0_f64, 2.0],
53807        };
53808        let p = a.data.as_ptr();
53809        let m = 0b10;
53810        let r = _mm_mask_load_pd(src, m, black_box(p));
53811        let e = _mm_setr_pd(42.0, 2.0);
53812        assert_eq_m128d(r, e);
53813    }
53814
53815    #[simd_test(enable = "avx512f,avx512vl")]
53816    const unsafe fn test_mm_maskz_load_pd() {
53817        #[repr(align(16))]
53818        struct Align {
53819            data: [f64; 2], // 16 bytes
53820        }
53821        let a = Align {
53822            data: [1.0_f64, 2.0],
53823        };
53824        let p = a.data.as_ptr();
53825        let m = 0b10;
53826        let r = _mm_maskz_load_pd(m, black_box(p));
53827        let e = _mm_setr_pd(0.0, 2.0);
53828        assert_eq_m128d(r, e);
53829    }
53830
53831    #[simd_test(enable = "avx512f")]
53832    unsafe fn test_mm_mask_load_ss() {
53833        #[repr(align(16))]
53834        struct Align {
53835            data: f32,
53836        }
53837        let src = _mm_set_ss(2.0);
53838        let mem = Align { data: 1.0 };
53839        let r = _mm_mask_load_ss(src, 0b1, &mem.data);
53840        assert_eq_m128(r, _mm_set_ss(1.0));
53841        let r = _mm_mask_load_ss(src, 0b0, &mem.data);
53842        assert_eq_m128(r, _mm_set_ss(2.0));
53843    }
53844
53845    #[simd_test(enable = "avx512f")]
53846    unsafe fn test_mm_maskz_load_ss() {
53847        #[repr(align(16))]
53848        struct Align {
53849            data: f32,
53850        }
53851        let mem = Align { data: 1.0 };
53852        let r = _mm_maskz_load_ss(0b1, &mem.data);
53853        assert_eq_m128(r, _mm_set_ss(1.0));
53854        let r = _mm_maskz_load_ss(0b0, &mem.data);
53855        assert_eq_m128(r, _mm_set_ss(0.0));
53856    }
53857
53858    #[simd_test(enable = "avx512f")]
53859    unsafe fn test_mm_mask_load_sd() {
53860        #[repr(align(16))]
53861        struct Align {
53862            data: f64,
53863        }
53864        let src = _mm_set_sd(2.0);
53865        let mem = Align { data: 1.0 };
53866        let r = _mm_mask_load_sd(src, 0b1, &mem.data);
53867        assert_eq_m128d(r, _mm_set_sd(1.0));
53868        let r = _mm_mask_load_sd(src, 0b0, &mem.data);
53869        assert_eq_m128d(r, _mm_set_sd(2.0));
53870    }
53871
53872    #[simd_test(enable = "avx512f")]
53873    unsafe fn test_mm_maskz_load_sd() {
53874        #[repr(align(16))]
53875        struct Align {
53876            data: f64,
53877        }
53878        let mem = Align { data: 1.0 };
53879        let r = _mm_maskz_load_sd(0b1, &mem.data);
53880        assert_eq_m128d(r, _mm_set_sd(1.0));
53881        let r = _mm_maskz_load_sd(0b0, &mem.data);
53882        assert_eq_m128d(r, _mm_set_sd(0.0));
53883    }
53884
53885    #[simd_test(enable = "avx512f,avx512vl")]
53886    const unsafe fn test_mm_mask_storeu_pd() {
53887        let mut r = [42_f64; 2];
53888        let a = _mm_setr_pd(1.0, 2.0);
53889        let m = 0b10;
53890        _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
53891        let e = _mm_setr_pd(42.0, 2.0);
53892        assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
53893    }
53894
53895    #[simd_test(enable = "avx512f,avx512vl")]
53896    const unsafe fn test_mm_mask_store_pd() {
53897        #[repr(align(16))]
53898        struct Align {
53899            data: [f64; 2], // 16 bytes
53900        }
53901        let mut r = Align { data: [42.0; 2] };
53902        let a = _mm_setr_pd(1.0, 2.0);
53903        let m = 0b10;
53904        _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
53905        let e = _mm_setr_pd(42.0, 2.0);
53906        assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
53907    }
53908
53909    #[simd_test(enable = "avx512f")]
53910    unsafe fn test_mm_mask_store_ss() {
53911        #[repr(align(16))]
53912        struct Align {
53913            data: f32,
53914        }
53915        let a = _mm_set_ss(2.0);
53916        let mut mem = Align { data: 1.0 };
53917        _mm_mask_store_ss(&mut mem.data, 0b1, a);
53918        assert_eq!(mem.data, 2.0);
53919        _mm_mask_store_ss(&mut mem.data, 0b0, a);
53920        assert_eq!(mem.data, 2.0);
53921    }
53922
53923    #[simd_test(enable = "avx512f")]
53924    unsafe fn test_mm_mask_store_sd() {
53925        #[repr(align(16))]
53926        struct Align {
53927            data: f64,
53928        }
53929        let a = _mm_set_sd(2.0);
53930        let mut mem = Align { data: 1.0 };
53931        _mm_mask_store_sd(&mut mem.data, 0b1, a);
53932        assert_eq!(mem.data, 2.0);
53933        _mm_mask_store_sd(&mut mem.data, 0b0, a);
53934        assert_eq!(mem.data, 2.0);
53935    }
53936
53937    #[simd_test(enable = "avx512f")]
53938    const fn test_mm512_setr_pd() {
53939        let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
53940        assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
53941    }
53942
53943    #[simd_test(enable = "avx512f")]
53944    const fn test_mm512_set_pd() {
53945        let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
53946        assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
53947    }
53948
53949    #[simd_test(enable = "avx512f")]
53950    const fn test_mm512_rol_epi32() {
53951        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
53952        let r = _mm512_rol_epi32::<1>(a);
53953        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
53954        assert_eq_m512i(r, e);
53955    }
53956
53957    #[simd_test(enable = "avx512f")]
53958    const fn test_mm512_mask_rol_epi32() {
53959        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
53960        let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
53961        assert_eq_m512i(r, a);
53962        let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
53963        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
53964        assert_eq_m512i(r, e);
53965    }
53966
53967    #[simd_test(enable = "avx512f")]
53968    const fn test_mm512_maskz_rol_epi32() {
53969        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
53970        let r = _mm512_maskz_rol_epi32::<1>(0, a);
53971        assert_eq_m512i(r, _mm512_setzero_si512());
53972        let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
53973        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
53974        assert_eq_m512i(r, e);
53975    }
53976
53977    #[simd_test(enable = "avx512f,avx512vl")]
53978    const fn test_mm256_rol_epi32() {
53979        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
53980        let r = _mm256_rol_epi32::<1>(a);
53981        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
53982        assert_eq_m256i(r, e);
53983    }
53984
53985    #[simd_test(enable = "avx512f,avx512vl")]
53986    const fn test_mm256_mask_rol_epi32() {
53987        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
53988        let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
53989        assert_eq_m256i(r, a);
53990        let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
53991        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
53992        assert_eq_m256i(r, e);
53993    }
53994
53995    #[simd_test(enable = "avx512f,avx512vl")]
53996    const fn test_mm256_maskz_rol_epi32() {
53997        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
53998        let r = _mm256_maskz_rol_epi32::<1>(0, a);
53999        assert_eq_m256i(r, _mm256_setzero_si256());
54000        let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
54001        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54002        assert_eq_m256i(r, e);
54003    }
54004
54005    #[simd_test(enable = "avx512f,avx512vl")]
54006    const fn test_mm_rol_epi32() {
54007        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54008        let r = _mm_rol_epi32::<1>(a);
54009        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54010        assert_eq_m128i(r, e);
54011    }
54012
54013    #[simd_test(enable = "avx512f,avx512vl")]
54014    const fn test_mm_mask_rol_epi32() {
54015        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54016        let r = _mm_mask_rol_epi32::<1>(a, 0, a);
54017        assert_eq_m128i(r, a);
54018        let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
54019        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54020        assert_eq_m128i(r, e);
54021    }
54022
54023    #[simd_test(enable = "avx512f,avx512vl")]
54024    const fn test_mm_maskz_rol_epi32() {
54025        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54026        let r = _mm_maskz_rol_epi32::<1>(0, a);
54027        assert_eq_m128i(r, _mm_setzero_si128());
54028        let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
54029        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54030        assert_eq_m128i(r, e);
54031    }
54032
54033    #[simd_test(enable = "avx512f")]
54034    const fn test_mm512_ror_epi32() {
54035        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54036        let r = _mm512_ror_epi32::<1>(a);
54037        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54038        assert_eq_m512i(r, e);
54039    }
54040
54041    #[simd_test(enable = "avx512f")]
54042    const fn test_mm512_mask_ror_epi32() {
54043        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54044        let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
54045        assert_eq_m512i(r, a);
54046        let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
54047        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54048        assert_eq_m512i(r, e);
54049    }
54050
54051    #[simd_test(enable = "avx512f")]
54052    const fn test_mm512_maskz_ror_epi32() {
54053        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54054        let r = _mm512_maskz_ror_epi32::<1>(0, a);
54055        assert_eq_m512i(r, _mm512_setzero_si512());
54056        let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
54057        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54058        assert_eq_m512i(r, e);
54059    }
54060
54061    #[simd_test(enable = "avx512f,avx512vl")]
54062    const fn test_mm256_ror_epi32() {
54063        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54064        let r = _mm256_ror_epi32::<1>(a);
54065        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54066        assert_eq_m256i(r, e);
54067    }
54068
54069    #[simd_test(enable = "avx512f,avx512vl")]
54070    const fn test_mm256_mask_ror_epi32() {
54071        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54072        let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
54073        assert_eq_m256i(r, a);
54074        let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
54075        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54076        assert_eq_m256i(r, e);
54077    }
54078
54079    #[simd_test(enable = "avx512f,avx512vl")]
54080    const fn test_mm256_maskz_ror_epi32() {
54081        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54082        let r = _mm256_maskz_ror_epi32::<1>(0, a);
54083        assert_eq_m256i(r, _mm256_setzero_si256());
54084        let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
54085        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54086        assert_eq_m256i(r, e);
54087    }
54088
54089    #[simd_test(enable = "avx512f,avx512vl")]
54090    const fn test_mm_ror_epi32() {
54091        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54092        let r = _mm_ror_epi32::<1>(a);
54093        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54094        assert_eq_m128i(r, e);
54095    }
54096
54097    #[simd_test(enable = "avx512f,avx512vl")]
54098    const fn test_mm_mask_ror_epi32() {
54099        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54100        let r = _mm_mask_ror_epi32::<1>(a, 0, a);
54101        assert_eq_m128i(r, a);
54102        let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
54103        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54104        assert_eq_m128i(r, e);
54105    }
54106
54107    #[simd_test(enable = "avx512f,avx512vl")]
54108    const fn test_mm_maskz_ror_epi32() {
54109        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54110        let r = _mm_maskz_ror_epi32::<1>(0, a);
54111        assert_eq_m128i(r, _mm_setzero_si128());
54112        let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
54113        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54114        assert_eq_m128i(r, e);
54115    }
54116
54117    #[simd_test(enable = "avx512f")]
54118    const fn test_mm512_slli_epi32() {
54119        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54120        let r = _mm512_slli_epi32::<1>(a);
54121        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54122        assert_eq_m512i(r, e);
54123    }
54124
54125    #[simd_test(enable = "avx512f")]
54126    const fn test_mm512_mask_slli_epi32() {
54127        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54128        let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
54129        assert_eq_m512i(r, a);
54130        let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
54131        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54132        assert_eq_m512i(r, e);
54133    }
54134
54135    #[simd_test(enable = "avx512f")]
54136    const fn test_mm512_maskz_slli_epi32() {
54137        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54138        let r = _mm512_maskz_slli_epi32::<1>(0, a);
54139        assert_eq_m512i(r, _mm512_setzero_si512());
54140        let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
54141        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
54142        assert_eq_m512i(r, e);
54143    }
54144
54145    #[simd_test(enable = "avx512f,avx512vl")]
54146    const fn test_mm256_mask_slli_epi32() {
54147        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54148        let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
54149        assert_eq_m256i(r, a);
54150        let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
54151        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54152        assert_eq_m256i(r, e);
54153    }
54154
54155    #[simd_test(enable = "avx512f,avx512vl")]
54156    const fn test_mm256_maskz_slli_epi32() {
54157        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54158        let r = _mm256_maskz_slli_epi32::<1>(0, a);
54159        assert_eq_m256i(r, _mm256_setzero_si256());
54160        let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
54161        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54162        assert_eq_m256i(r, e);
54163    }
54164
54165    #[simd_test(enable = "avx512f,avx512vl")]
54166    const fn test_mm_mask_slli_epi32() {
54167        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54168        let r = _mm_mask_slli_epi32::<1>(a, 0, a);
54169        assert_eq_m128i(r, a);
54170        let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
54171        let e = _mm_set_epi32(0, 2, 2, 2);
54172        assert_eq_m128i(r, e);
54173    }
54174
54175    #[simd_test(enable = "avx512f,avx512vl")]
54176    const fn test_mm_maskz_slli_epi32() {
54177        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54178        let r = _mm_maskz_slli_epi32::<1>(0, a);
54179        assert_eq_m128i(r, _mm_setzero_si128());
54180        let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
54181        let e = _mm_set_epi32(0, 2, 2, 2);
54182        assert_eq_m128i(r, e);
54183    }
54184
54185    #[simd_test(enable = "avx512f")]
54186    const fn test_mm512_srli_epi32() {
54187        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54188        let r = _mm512_srli_epi32::<1>(a);
54189        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54190        assert_eq_m512i(r, e);
54191    }
54192
54193    #[simd_test(enable = "avx512f")]
54194    const fn test_mm512_mask_srli_epi32() {
54195        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54196        let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
54197        assert_eq_m512i(r, a);
54198        let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
54199        let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54200        assert_eq_m512i(r, e);
54201    }
54202
54203    #[simd_test(enable = "avx512f")]
54204    const fn test_mm512_maskz_srli_epi32() {
54205        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
54206        let r = _mm512_maskz_srli_epi32::<1>(0, a);
54207        assert_eq_m512i(r, _mm512_setzero_si512());
54208        let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
54209        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
54210        assert_eq_m512i(r, e);
54211    }
54212
54213    #[simd_test(enable = "avx512f,avx512vl")]
54214    const fn test_mm256_mask_srli_epi32() {
54215        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54216        let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
54217        assert_eq_m256i(r, a);
54218        let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
54219        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54220        assert_eq_m256i(r, e);
54221    }
54222
54223    #[simd_test(enable = "avx512f,avx512vl")]
54224    const fn test_mm256_maskz_srli_epi32() {
54225        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54226        let r = _mm256_maskz_srli_epi32::<1>(0, a);
54227        assert_eq_m256i(r, _mm256_setzero_si256());
54228        let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
54229        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54230        assert_eq_m256i(r, e);
54231    }
54232
54233    #[simd_test(enable = "avx512f,avx512vl")]
54234    const fn test_mm_mask_srli_epi32() {
54235        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54236        let r = _mm_mask_srli_epi32::<1>(a, 0, a);
54237        assert_eq_m128i(r, a);
54238        let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
54239        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54240        assert_eq_m128i(r, e);
54241    }
54242
54243    #[simd_test(enable = "avx512f,avx512vl")]
54244    const fn test_mm_maskz_srli_epi32() {
54245        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54246        let r = _mm_maskz_srli_epi32::<1>(0, a);
54247        assert_eq_m128i(r, _mm_setzero_si128());
54248        let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
54249        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54250        assert_eq_m128i(r, e);
54251    }
54252
54253    #[simd_test(enable = "avx512f")]
54254    const fn test_mm512_rolv_epi32() {
54255        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54256        let b = _mm512_set1_epi32(1);
54257        let r = _mm512_rolv_epi32(a, b);
54258        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54259        assert_eq_m512i(r, e);
54260    }
54261
54262    #[simd_test(enable = "avx512f")]
54263    const fn test_mm512_mask_rolv_epi32() {
54264        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54265        let b = _mm512_set1_epi32(1);
54266        let r = _mm512_mask_rolv_epi32(a, 0, a, b);
54267        assert_eq_m512i(r, a);
54268        let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
54269        let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54270        assert_eq_m512i(r, e);
54271    }
54272
54273    #[simd_test(enable = "avx512f")]
54274    const fn test_mm512_maskz_rolv_epi32() {
54275        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54276        let b = _mm512_set1_epi32(1);
54277        let r = _mm512_maskz_rolv_epi32(0, a, b);
54278        assert_eq_m512i(r, _mm512_setzero_si512());
54279        let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
54280        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54281        assert_eq_m512i(r, e);
54282    }
54283
54284    #[simd_test(enable = "avx512f,avx512vl")]
54285    const fn test_mm256_rolv_epi32() {
54286        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54287        let b = _mm256_set1_epi32(1);
54288        let r = _mm256_rolv_epi32(a, b);
54289        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54290        assert_eq_m256i(r, e);
54291    }
54292
54293    #[simd_test(enable = "avx512f,avx512vl")]
54294    const fn test_mm256_mask_rolv_epi32() {
54295        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54296        let b = _mm256_set1_epi32(1);
54297        let r = _mm256_mask_rolv_epi32(a, 0, a, b);
54298        assert_eq_m256i(r, a);
54299        let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
54300        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54301        assert_eq_m256i(r, e);
54302    }
54303
54304    #[simd_test(enable = "avx512f,avx512vl")]
54305    const fn test_mm256_maskz_rolv_epi32() {
54306        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54307        let b = _mm256_set1_epi32(1);
54308        let r = _mm256_maskz_rolv_epi32(0, a, b);
54309        assert_eq_m256i(r, _mm256_setzero_si256());
54310        let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
54311        let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54312        assert_eq_m256i(r, e);
54313    }
54314
54315    #[simd_test(enable = "avx512f,avx512vl")]
54316    const fn test_mm_rolv_epi32() {
54317        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54318        let b = _mm_set1_epi32(1);
54319        let r = _mm_rolv_epi32(a, b);
54320        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54321        assert_eq_m128i(r, e);
54322    }
54323
54324    #[simd_test(enable = "avx512f,avx512vl")]
54325    const fn test_mm_mask_rolv_epi32() {
54326        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54327        let b = _mm_set1_epi32(1);
54328        let r = _mm_mask_rolv_epi32(a, 0, a, b);
54329        assert_eq_m128i(r, a);
54330        let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
54331        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54332        assert_eq_m128i(r, e);
54333    }
54334
54335    #[simd_test(enable = "avx512f,avx512vl")]
54336    const fn test_mm_maskz_rolv_epi32() {
54337        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54338        let b = _mm_set1_epi32(1);
54339        let r = _mm_maskz_rolv_epi32(0, a, b);
54340        assert_eq_m128i(r, _mm_setzero_si128());
54341        let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
54342        let e = _mm_set_epi32(1 << 0, 2, 2, 2);
54343        assert_eq_m128i(r, e);
54344    }
54345
54346    #[simd_test(enable = "avx512f")]
54347    const fn test_mm512_rorv_epi32() {
54348        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54349        let b = _mm512_set1_epi32(1);
54350        let r = _mm512_rorv_epi32(a, b);
54351        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54352        assert_eq_m512i(r, e);
54353    }
54354
54355    #[simd_test(enable = "avx512f")]
54356    const fn test_mm512_mask_rorv_epi32() {
54357        let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54358        let b = _mm512_set1_epi32(1);
54359        let r = _mm512_mask_rorv_epi32(a, 0, a, b);
54360        assert_eq_m512i(r, a);
54361        let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
54362        let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54363        assert_eq_m512i(r, e);
54364    }
54365
54366    #[simd_test(enable = "avx512f")]
54367    const fn test_mm512_maskz_rorv_epi32() {
54368        let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
54369        let b = _mm512_set1_epi32(1);
54370        let r = _mm512_maskz_rorv_epi32(0, a, b);
54371        assert_eq_m512i(r, _mm512_setzero_si512());
54372        let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
54373        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54374        assert_eq_m512i(r, e);
54375    }
54376
54377    #[simd_test(enable = "avx512f,avx512vl")]
54378    const fn test_mm256_rorv_epi32() {
54379        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54380        let b = _mm256_set1_epi32(1);
54381        let r = _mm256_rorv_epi32(a, b);
54382        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54383        assert_eq_m256i(r, e);
54384    }
54385
54386    #[simd_test(enable = "avx512f,avx512vl")]
54387    const fn test_mm256_mask_rorv_epi32() {
54388        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54389        let b = _mm256_set1_epi32(1);
54390        let r = _mm256_mask_rorv_epi32(a, 0, a, b);
54391        assert_eq_m256i(r, a);
54392        let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
54393        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54394        assert_eq_m256i(r, e);
54395    }
54396
54397    #[simd_test(enable = "avx512f,avx512vl")]
54398    const fn test_mm256_maskz_rorv_epi32() {
54399        let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
54400        let b = _mm256_set1_epi32(1);
54401        let r = _mm256_maskz_rorv_epi32(0, a, b);
54402        assert_eq_m256i(r, _mm256_setzero_si256());
54403        let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
54404        let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54405        assert_eq_m256i(r, e);
54406    }
54407
54408    #[simd_test(enable = "avx512f,avx512vl")]
54409    const fn test_mm_rorv_epi32() {
54410        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54411        let b = _mm_set1_epi32(1);
54412        let r = _mm_rorv_epi32(a, b);
54413        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54414        assert_eq_m128i(r, e);
54415    }
54416
54417    #[simd_test(enable = "avx512f,avx512vl")]
54418    const fn test_mm_mask_rorv_epi32() {
54419        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54420        let b = _mm_set1_epi32(1);
54421        let r = _mm_mask_rorv_epi32(a, 0, a, b);
54422        assert_eq_m128i(r, a);
54423        let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
54424        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54425        assert_eq_m128i(r, e);
54426    }
54427
54428    #[simd_test(enable = "avx512f,avx512vl")]
54429    const fn test_mm_maskz_rorv_epi32() {
54430        let a = _mm_set_epi32(1 << 0, 2, 2, 2);
54431        let b = _mm_set1_epi32(1);
54432        let r = _mm_maskz_rorv_epi32(0, a, b);
54433        assert_eq_m128i(r, _mm_setzero_si128());
54434        let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
54435        let e = _mm_set_epi32(1 << 31, 1, 1, 1);
54436        assert_eq_m128i(r, e);
54437    }
54438
54439    #[simd_test(enable = "avx512f")]
54440    const fn test_mm512_sllv_epi32() {
54441        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54442        let count = _mm512_set1_epi32(1);
54443        let r = _mm512_sllv_epi32(a, count);
54444        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54445        assert_eq_m512i(r, e);
54446    }
54447
54448    #[simd_test(enable = "avx512f")]
54449    const fn test_mm512_mask_sllv_epi32() {
54450        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54451        let count = _mm512_set1_epi32(1);
54452        let r = _mm512_mask_sllv_epi32(a, 0, a, count);
54453        assert_eq_m512i(r, a);
54454        let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
54455        let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54456        assert_eq_m512i(r, e);
54457    }
54458
54459    #[simd_test(enable = "avx512f")]
54460    const fn test_mm512_maskz_sllv_epi32() {
54461        let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
54462        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54463        let r = _mm512_maskz_sllv_epi32(0, a, count);
54464        assert_eq_m512i(r, _mm512_setzero_si512());
54465        let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
54466        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
54467        assert_eq_m512i(r, e);
54468    }
54469
54470    #[simd_test(enable = "avx512f,avx512vl")]
54471    const fn test_mm256_mask_sllv_epi32() {
54472        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54473        let count = _mm256_set1_epi32(1);
54474        let r = _mm256_mask_sllv_epi32(a, 0, a, count);
54475        assert_eq_m256i(r, a);
54476        let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
54477        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54478        assert_eq_m256i(r, e);
54479    }
54480
54481    #[simd_test(enable = "avx512f,avx512vl")]
54482    const fn test_mm256_maskz_sllv_epi32() {
54483        let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
54484        let count = _mm256_set1_epi32(1);
54485        let r = _mm256_maskz_sllv_epi32(0, a, count);
54486        assert_eq_m256i(r, _mm256_setzero_si256());
54487        let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
54488        let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
54489        assert_eq_m256i(r, e);
54490    }
54491
54492    #[simd_test(enable = "avx512f,avx512vl")]
54493    const fn test_mm_mask_sllv_epi32() {
54494        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54495        let count = _mm_set1_epi32(1);
54496        let r = _mm_mask_sllv_epi32(a, 0, a, count);
54497        assert_eq_m128i(r, a);
54498        let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
54499        let e = _mm_set_epi32(0, 2, 2, 2);
54500        assert_eq_m128i(r, e);
54501    }
54502
54503    #[simd_test(enable = "avx512f,avx512vl")]
54504    const fn test_mm_maskz_sllv_epi32() {
54505        let a = _mm_set_epi32(1 << 31, 1, 1, 1);
54506        let count = _mm_set1_epi32(1);
54507        let r = _mm_maskz_sllv_epi32(0, a, count);
54508        assert_eq_m128i(r, _mm_setzero_si128());
54509        let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
54510        let e = _mm_set_epi32(0, 2, 2, 2);
54511        assert_eq_m128i(r, e);
54512    }
54513
54514    #[simd_test(enable = "avx512f")]
54515    const fn test_mm512_srlv_epi32() {
54516        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54517        let count = _mm512_set1_epi32(1);
54518        let r = _mm512_srlv_epi32(a, count);
54519        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54520        assert_eq_m512i(r, e);
54521    }
54522
54523    #[simd_test(enable = "avx512f")]
54524    const fn test_mm512_mask_srlv_epi32() {
54525        let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
54526        let count = _mm512_set1_epi32(1);
54527        let r = _mm512_mask_srlv_epi32(a, 0, a, count);
54528        assert_eq_m512i(r, a);
54529        let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
54530        let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54531        assert_eq_m512i(r, e);
54532    }
54533
54534    #[simd_test(enable = "avx512f")]
54535    const fn test_mm512_maskz_srlv_epi32() {
54536        let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
54537        let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
54538        let r = _mm512_maskz_srlv_epi32(0, a, count);
54539        assert_eq_m512i(r, _mm512_setzero_si512());
54540        let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
54541        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
54542        assert_eq_m512i(r, e);
54543    }
54544
54545    #[simd_test(enable = "avx512f,avx512vl")]
54546    const fn test_mm256_mask_srlv_epi32() {
54547        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54548        let count = _mm256_set1_epi32(1);
54549        let r = _mm256_mask_srlv_epi32(a, 0, a, count);
54550        assert_eq_m256i(r, a);
54551        let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
54552        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54553        assert_eq_m256i(r, e);
54554    }
54555
54556    #[simd_test(enable = "avx512f,avx512vl")]
54557    const fn test_mm256_maskz_srlv_epi32() {
54558        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54559        let count = _mm256_set1_epi32(1);
54560        let r = _mm256_maskz_srlv_epi32(0, a, count);
54561        assert_eq_m256i(r, _mm256_setzero_si256());
54562        let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
54563        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54564        assert_eq_m256i(r, e);
54565    }
54566
54567    #[simd_test(enable = "avx512f,avx512vl")]
54568    const fn test_mm_mask_srlv_epi32() {
54569        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54570        let count = _mm_set1_epi32(1);
54571        let r = _mm_mask_srlv_epi32(a, 0, a, count);
54572        assert_eq_m128i(r, a);
54573        let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
54574        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54575        assert_eq_m128i(r, e);
54576    }
54577
54578    #[simd_test(enable = "avx512f,avx512vl")]
54579    const fn test_mm_maskz_srlv_epi32() {
54580        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54581        let count = _mm_set1_epi32(1);
54582        let r = _mm_maskz_srlv_epi32(0, a, count);
54583        assert_eq_m128i(r, _mm_setzero_si128());
54584        let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
54585        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54586        assert_eq_m128i(r, e);
54587    }
54588
54589    #[simd_test(enable = "avx512f")]
54590    fn test_mm512_sll_epi32() {
54591        #[rustfmt::skip]
54592        let a = _mm512_set_epi32(
54593            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54594            0, 0, 0, 0,
54595            0, 0, 0, 0,
54596            0, 0, 0, 0,
54597        );
54598        let count = _mm_set_epi32(0, 0, 0, 2);
54599        let r = _mm512_sll_epi32(a, count);
54600        #[rustfmt::skip]
54601        let e = _mm512_set_epi32(
54602            0, 1 << 2, 1 << 3, 1 << 4,
54603            0, 0, 0, 0,
54604            0, 0, 0, 0,
54605            0, 0, 0, 0,
54606        );
54607        assert_eq_m512i(r, e);
54608    }
54609
54610    #[simd_test(enable = "avx512f")]
54611    fn test_mm512_mask_sll_epi32() {
54612        #[rustfmt::skip]
54613        let a = _mm512_set_epi32(
54614            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54615            0, 0, 0, 0,
54616            0, 0, 0, 0,
54617            0, 0, 0, 0,
54618        );
54619        let count = _mm_set_epi32(0, 0, 0, 2);
54620        let r = _mm512_mask_sll_epi32(a, 0, a, count);
54621        assert_eq_m512i(r, a);
54622        let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
54623        #[rustfmt::skip]
54624        let e = _mm512_set_epi32(
54625            0, 1 << 2, 1 << 3, 1 << 4,
54626            0, 0, 0, 0,
54627            0, 0, 0, 0,
54628            0, 0, 0, 0,
54629        );
54630        assert_eq_m512i(r, e);
54631    }
54632
54633    #[simd_test(enable = "avx512f")]
54634    fn test_mm512_maskz_sll_epi32() {
54635        #[rustfmt::skip]
54636        let a = _mm512_set_epi32(
54637            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54638            0, 0, 0, 0,
54639            0, 0, 0, 0,
54640            0, 0, 0, 1 << 31,
54641        );
54642        let count = _mm_set_epi32(2, 0, 0, 2);
54643        let r = _mm512_maskz_sll_epi32(0, a, count);
54644        assert_eq_m512i(r, _mm512_setzero_si512());
54645        let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
54646        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54647        assert_eq_m512i(r, e);
54648    }
54649
54650    #[simd_test(enable = "avx512f,avx512vl")]
54651    fn test_mm256_mask_sll_epi32() {
54652        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
54653        let count = _mm_set_epi32(0, 0, 0, 1);
54654        let r = _mm256_mask_sll_epi32(a, 0, a, count);
54655        assert_eq_m256i(r, a);
54656        let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
54657        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
54658        assert_eq_m256i(r, e);
54659    }
54660
54661    #[simd_test(enable = "avx512f,avx512vl")]
54662    fn test_mm256_maskz_sll_epi32() {
54663        let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
54664        let count = _mm_set_epi32(0, 0, 0, 1);
54665        let r = _mm256_maskz_sll_epi32(0, a, count);
54666        assert_eq_m256i(r, _mm256_setzero_si256());
54667        let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
54668        let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
54669        assert_eq_m256i(r, e);
54670    }
54671
54672    #[simd_test(enable = "avx512f,avx512vl")]
54673    fn test_mm_mask_sll_epi32() {
54674        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
54675        let count = _mm_set_epi32(0, 0, 0, 1);
54676        let r = _mm_mask_sll_epi32(a, 0, a, count);
54677        assert_eq_m128i(r, a);
54678        let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
54679        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
54680        assert_eq_m128i(r, e);
54681    }
54682
54683    #[simd_test(enable = "avx512f,avx512vl")]
54684    fn test_mm_maskz_sll_epi32() {
54685        let a = _mm_set_epi32(1 << 13, 0, 0, 0);
54686        let count = _mm_set_epi32(0, 0, 0, 1);
54687        let r = _mm_maskz_sll_epi32(0, a, count);
54688        assert_eq_m128i(r, _mm_setzero_si128());
54689        let r = _mm_maskz_sll_epi32(0b00001111, a, count);
54690        let e = _mm_set_epi32(1 << 14, 0, 0, 0);
54691        assert_eq_m128i(r, e);
54692    }
54693
54694    #[simd_test(enable = "avx512f")]
54695    fn test_mm512_srl_epi32() {
54696        #[rustfmt::skip]
54697        let a = _mm512_set_epi32(
54698            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54699            0, 0, 0, 0,
54700            0, 0, 0, 0,
54701            0, 0, 0, 0,
54702        );
54703        let count = _mm_set_epi32(0, 0, 0, 2);
54704        let r = _mm512_srl_epi32(a, count);
54705        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54706        assert_eq_m512i(r, e);
54707    }
54708
54709    #[simd_test(enable = "avx512f")]
54710    fn test_mm512_mask_srl_epi32() {
54711        #[rustfmt::skip]
54712        let a = _mm512_set_epi32(
54713            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54714            0, 0, 0, 0,
54715            0, 0, 0, 0,
54716            0, 0, 0, 0,
54717        );
54718        let count = _mm_set_epi32(0, 0, 0, 2);
54719        let r = _mm512_mask_srl_epi32(a, 0, a, count);
54720        assert_eq_m512i(r, a);
54721        let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
54722        let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54723        assert_eq_m512i(r, e);
54724    }
54725
54726    #[simd_test(enable = "avx512f")]
54727    fn test_mm512_maskz_srl_epi32() {
54728        #[rustfmt::skip]
54729        let a = _mm512_set_epi32(
54730            1 << 31, 1 << 0, 1 << 1, 1 << 2,
54731            0, 0, 0, 0,
54732            0, 0, 0, 0,
54733            0, 0, 0, 1 << 31,
54734        );
54735        let count = _mm_set_epi32(2, 0, 0, 2);
54736        let r = _mm512_maskz_srl_epi32(0, a, count);
54737        assert_eq_m512i(r, _mm512_setzero_si512());
54738        let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
54739        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
54740        assert_eq_m512i(r, e);
54741    }
54742
54743    #[simd_test(enable = "avx512f,avx512vl")]
54744    fn test_mm256_mask_srl_epi32() {
54745        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54746        let count = _mm_set_epi32(0, 0, 0, 1);
54747        let r = _mm256_mask_srl_epi32(a, 0, a, count);
54748        assert_eq_m256i(r, a);
54749        let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
54750        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54751        assert_eq_m256i(r, e);
54752    }
54753
54754    #[simd_test(enable = "avx512f,avx512vl")]
54755    fn test_mm256_maskz_srl_epi32() {
54756        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54757        let count = _mm_set_epi32(0, 0, 0, 1);
54758        let r = _mm256_maskz_srl_epi32(0, a, count);
54759        assert_eq_m256i(r, _mm256_setzero_si256());
54760        let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
54761        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54762        assert_eq_m256i(r, e);
54763    }
54764
54765    #[simd_test(enable = "avx512f,avx512vl")]
54766    fn test_mm_mask_srl_epi32() {
54767        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54768        let count = _mm_set_epi32(0, 0, 0, 1);
54769        let r = _mm_mask_srl_epi32(a, 0, a, count);
54770        assert_eq_m128i(r, a);
54771        let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
54772        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54773        assert_eq_m128i(r, e);
54774    }
54775
54776    #[simd_test(enable = "avx512f,avx512vl")]
54777    fn test_mm_maskz_srl_epi32() {
54778        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54779        let count = _mm_set_epi32(0, 0, 0, 1);
54780        let r = _mm_maskz_srl_epi32(0, a, count);
54781        assert_eq_m128i(r, _mm_setzero_si128());
54782        let r = _mm_maskz_srl_epi32(0b00001111, a, count);
54783        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54784        assert_eq_m128i(r, e);
54785    }
54786
54787    #[simd_test(enable = "avx512f")]
54788    fn test_mm512_sra_epi32() {
54789        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54790        let count = _mm_set_epi32(1, 0, 0, 2);
54791        let r = _mm512_sra_epi32(a, count);
54792        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54793        assert_eq_m512i(r, e);
54794    }
54795
54796    #[simd_test(enable = "avx512f")]
54797    fn test_mm512_mask_sra_epi32() {
54798        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
54799        let count = _mm_set_epi32(0, 0, 0, 2);
54800        let r = _mm512_mask_sra_epi32(a, 0, a, count);
54801        assert_eq_m512i(r, a);
54802        let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
54803        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
54804        assert_eq_m512i(r, e);
54805    }
54806
54807    #[simd_test(enable = "avx512f")]
54808    fn test_mm512_maskz_sra_epi32() {
54809        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
54810        let count = _mm_set_epi32(2, 0, 0, 2);
54811        let r = _mm512_maskz_sra_epi32(0, a, count);
54812        assert_eq_m512i(r, _mm512_setzero_si512());
54813        let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
54814        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
54815        assert_eq_m512i(r, e);
54816    }
54817
54818    #[simd_test(enable = "avx512f,avx512vl")]
54819    fn test_mm256_mask_sra_epi32() {
54820        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54821        let count = _mm_set_epi32(0, 0, 0, 1);
54822        let r = _mm256_mask_sra_epi32(a, 0, a, count);
54823        assert_eq_m256i(r, a);
54824        let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
54825        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54826        assert_eq_m256i(r, e);
54827    }
54828
54829    #[simd_test(enable = "avx512f,avx512vl")]
54830    fn test_mm256_maskz_sra_epi32() {
54831        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54832        let count = _mm_set_epi32(0, 0, 0, 1);
54833        let r = _mm256_maskz_sra_epi32(0, a, count);
54834        assert_eq_m256i(r, _mm256_setzero_si256());
54835        let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
54836        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54837        assert_eq_m256i(r, e);
54838    }
54839
54840    #[simd_test(enable = "avx512f,avx512vl")]
54841    fn test_mm_mask_sra_epi32() {
54842        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54843        let count = _mm_set_epi32(0, 0, 0, 1);
54844        let r = _mm_mask_sra_epi32(a, 0, a, count);
54845        assert_eq_m128i(r, a);
54846        let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
54847        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54848        assert_eq_m128i(r, e);
54849    }
54850
54851    #[simd_test(enable = "avx512f,avx512vl")]
54852    fn test_mm_maskz_sra_epi32() {
54853        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54854        let count = _mm_set_epi32(0, 0, 0, 1);
54855        let r = _mm_maskz_sra_epi32(0, a, count);
54856        assert_eq_m128i(r, _mm_setzero_si128());
54857        let r = _mm_maskz_sra_epi32(0b00001111, a, count);
54858        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54859        assert_eq_m128i(r, e);
54860    }
54861
54862    #[simd_test(enable = "avx512f")]
54863    const fn test_mm512_srav_epi32() {
54864        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54865        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
54866        let r = _mm512_srav_epi32(a, count);
54867        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54868        assert_eq_m512i(r, e);
54869    }
54870
54871    #[simd_test(enable = "avx512f")]
54872    const fn test_mm512_mask_srav_epi32() {
54873        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
54874        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
54875        let r = _mm512_mask_srav_epi32(a, 0, a, count);
54876        assert_eq_m512i(r, a);
54877        let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
54878        let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
54879        assert_eq_m512i(r, e);
54880    }
54881
54882    #[simd_test(enable = "avx512f")]
54883    const fn test_mm512_maskz_srav_epi32() {
54884        let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
54885        let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
54886        let r = _mm512_maskz_srav_epi32(0, a, count);
54887        assert_eq_m512i(r, _mm512_setzero_si512());
54888        let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
54889        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
54890        assert_eq_m512i(r, e);
54891    }
54892
54893    #[simd_test(enable = "avx512f,avx512vl")]
54894    const fn test_mm256_mask_srav_epi32() {
54895        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54896        let count = _mm256_set1_epi32(1);
54897        let r = _mm256_mask_srav_epi32(a, 0, a, count);
54898        assert_eq_m256i(r, a);
54899        let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
54900        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54901        assert_eq_m256i(r, e);
54902    }
54903
54904    #[simd_test(enable = "avx512f,avx512vl")]
54905    const fn test_mm256_maskz_srav_epi32() {
54906        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54907        let count = _mm256_set1_epi32(1);
54908        let r = _mm256_maskz_srav_epi32(0, a, count);
54909        assert_eq_m256i(r, _mm256_setzero_si256());
54910        let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
54911        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54912        assert_eq_m256i(r, e);
54913    }
54914
54915    #[simd_test(enable = "avx512f,avx512vl")]
54916    const fn test_mm_mask_srav_epi32() {
54917        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54918        let count = _mm_set1_epi32(1);
54919        let r = _mm_mask_srav_epi32(a, 0, a, count);
54920        assert_eq_m128i(r, a);
54921        let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
54922        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54923        assert_eq_m128i(r, e);
54924    }
54925
54926    #[simd_test(enable = "avx512f,avx512vl")]
54927    const fn test_mm_maskz_srav_epi32() {
54928        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54929        let count = _mm_set1_epi32(1);
54930        let r = _mm_maskz_srav_epi32(0, a, count);
54931        assert_eq_m128i(r, _mm_setzero_si128());
54932        let r = _mm_maskz_srav_epi32(0b00001111, a, count);
54933        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54934        assert_eq_m128i(r, e);
54935    }
54936
54937    #[simd_test(enable = "avx512f")]
54938    const fn test_mm512_srai_epi32() {
54939        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
54940        let r = _mm512_srai_epi32::<2>(a);
54941        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
54942        assert_eq_m512i(r, e);
54943    }
54944
54945    #[simd_test(enable = "avx512f")]
54946    const fn test_mm512_mask_srai_epi32() {
54947        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
54948        let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
54949        assert_eq_m512i(r, a);
54950        let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
54951        let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
54952        assert_eq_m512i(r, e);
54953    }
54954
54955    #[simd_test(enable = "avx512f")]
54956    const fn test_mm512_maskz_srai_epi32() {
54957        let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
54958        let r = _mm512_maskz_srai_epi32::<2>(0, a);
54959        assert_eq_m512i(r, _mm512_setzero_si512());
54960        let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
54961        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
54962        assert_eq_m512i(r, e);
54963    }
54964
54965    #[simd_test(enable = "avx512f,avx512vl")]
54966    const fn test_mm256_mask_srai_epi32() {
54967        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54968        let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
54969        assert_eq_m256i(r, a);
54970        let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
54971        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54972        assert_eq_m256i(r, e);
54973    }
54974
54975    #[simd_test(enable = "avx512f,avx512vl")]
54976    const fn test_mm256_maskz_srai_epi32() {
54977        let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
54978        let r = _mm256_maskz_srai_epi32::<1>(0, a);
54979        assert_eq_m256i(r, _mm256_setzero_si256());
54980        let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
54981        let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
54982        assert_eq_m256i(r, e);
54983    }
54984
54985    #[simd_test(enable = "avx512f,avx512vl")]
54986    const fn test_mm_mask_srai_epi32() {
54987        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54988        let r = _mm_mask_srai_epi32::<1>(a, 0, a);
54989        assert_eq_m128i(r, a);
54990        let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
54991        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
54992        assert_eq_m128i(r, e);
54993    }
54994
54995    #[simd_test(enable = "avx512f,avx512vl")]
54996    const fn test_mm_maskz_srai_epi32() {
54997        let a = _mm_set_epi32(1 << 5, 0, 0, 0);
54998        let r = _mm_maskz_srai_epi32::<1>(0, a);
54999        assert_eq_m128i(r, _mm_setzero_si128());
55000        let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
55001        let e = _mm_set_epi32(1 << 4, 0, 0, 0);
55002        assert_eq_m128i(r, e);
55003    }
55004
55005    #[simd_test(enable = "avx512f")]
55006    const fn test_mm512_permute_ps() {
55007        let a = _mm512_setr_ps(
55008            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55009        );
55010        let r = _mm512_permute_ps::<0b11_11_11_11>(a);
55011        let e = _mm512_setr_ps(
55012            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
55013        );
55014        assert_eq_m512(r, e);
55015    }
55016
55017    #[simd_test(enable = "avx512f")]
55018    const fn test_mm512_mask_permute_ps() {
55019        let a = _mm512_setr_ps(
55020            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55021        );
55022        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
55023        assert_eq_m512(r, a);
55024        let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
55025        let e = _mm512_setr_ps(
55026            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
55027        );
55028        assert_eq_m512(r, e);
55029    }
55030
55031    #[simd_test(enable = "avx512f")]
55032    const fn test_mm512_maskz_permute_ps() {
55033        let a = _mm512_setr_ps(
55034            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55035        );
55036        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
55037        assert_eq_m512(r, _mm512_setzero_ps());
55038        let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
55039        let e = _mm512_setr_ps(
55040            3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
55041        );
55042        assert_eq_m512(r, e);
55043    }
55044
55045    #[simd_test(enable = "avx512f,avx512vl")]
55046    const fn test_mm256_mask_permute_ps() {
55047        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55048        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
55049        assert_eq_m256(r, a);
55050        let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
55051        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
55052        assert_eq_m256(r, e);
55053    }
55054
55055    #[simd_test(enable = "avx512f,avx512vl")]
55056    const fn test_mm256_maskz_permute_ps() {
55057        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55058        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
55059        assert_eq_m256(r, _mm256_setzero_ps());
55060        let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
55061        let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
55062        assert_eq_m256(r, e);
55063    }
55064
55065    #[simd_test(enable = "avx512f,avx512vl")]
55066    const fn test_mm_mask_permute_ps() {
55067        let a = _mm_set_ps(0., 1., 2., 3.);
55068        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
55069        assert_eq_m128(r, a);
55070        let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
55071        let e = _mm_set_ps(0., 0., 0., 0.);
55072        assert_eq_m128(r, e);
55073    }
55074
55075    #[simd_test(enable = "avx512f,avx512vl")]
55076    const fn test_mm_maskz_permute_ps() {
55077        let a = _mm_set_ps(0., 1., 2., 3.);
55078        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
55079        assert_eq_m128(r, _mm_setzero_ps());
55080        let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
55081        let e = _mm_set_ps(0., 0., 0., 0.);
55082        assert_eq_m128(r, e);
55083    }
55084
55085    #[simd_test(enable = "avx512f")]
55086    fn test_mm512_permutevar_epi32() {
55087        let idx = _mm512_set1_epi32(1);
55088        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55089        let r = _mm512_permutevar_epi32(idx, a);
55090        let e = _mm512_set1_epi32(14);
55091        assert_eq_m512i(r, e);
55092    }
55093
55094    #[simd_test(enable = "avx512f")]
55095    fn test_mm512_mask_permutevar_epi32() {
55096        let idx = _mm512_set1_epi32(1);
55097        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55098        let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
55099        assert_eq_m512i(r, a);
55100        let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
55101        let e = _mm512_set1_epi32(14);
55102        assert_eq_m512i(r, e);
55103    }
55104
55105    #[simd_test(enable = "avx512f")]
55106    fn test_mm512_permutevar_ps() {
55107        let a = _mm512_set_ps(
55108            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55109        );
55110        let b = _mm512_set1_epi32(0b01);
55111        let r = _mm512_permutevar_ps(a, b);
55112        let e = _mm512_set_ps(
55113            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
55114        );
55115        assert_eq_m512(r, e);
55116    }
55117
55118    #[simd_test(enable = "avx512f")]
55119    fn test_mm512_mask_permutevar_ps() {
55120        let a = _mm512_set_ps(
55121            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55122        );
55123        let b = _mm512_set1_epi32(0b01);
55124        let r = _mm512_mask_permutevar_ps(a, 0, a, b);
55125        assert_eq_m512(r, a);
55126        let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
55127        let e = _mm512_set_ps(
55128            2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
55129        );
55130        assert_eq_m512(r, e);
55131    }
55132
55133    #[simd_test(enable = "avx512f")]
55134    fn test_mm512_maskz_permutevar_ps() {
55135        let a = _mm512_set_ps(
55136            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55137        );
55138        let b = _mm512_set1_epi32(0b01);
55139        let r = _mm512_maskz_permutevar_ps(0, a, b);
55140        assert_eq_m512(r, _mm512_setzero_ps());
55141        let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
55142        let e = _mm512_set_ps(
55143            0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
55144        );
55145        assert_eq_m512(r, e);
55146    }
55147
55148    #[simd_test(enable = "avx512f,avx512vl")]
55149    fn test_mm256_mask_permutevar_ps() {
55150        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55151        let b = _mm256_set1_epi32(0b01);
55152        let r = _mm256_mask_permutevar_ps(a, 0, a, b);
55153        assert_eq_m256(r, a);
55154        let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
55155        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
55156        assert_eq_m256(r, e);
55157    }
55158
55159    #[simd_test(enable = "avx512f,avx512vl")]
55160    fn test_mm256_maskz_permutevar_ps() {
55161        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55162        let b = _mm256_set1_epi32(0b01);
55163        let r = _mm256_maskz_permutevar_ps(0, a, b);
55164        assert_eq_m256(r, _mm256_setzero_ps());
55165        let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
55166        let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
55167        assert_eq_m256(r, e);
55168    }
55169
55170    #[simd_test(enable = "avx512f,avx512vl")]
55171    fn test_mm_mask_permutevar_ps() {
55172        let a = _mm_set_ps(0., 1., 2., 3.);
55173        let b = _mm_set1_epi32(0b01);
55174        let r = _mm_mask_permutevar_ps(a, 0, a, b);
55175        assert_eq_m128(r, a);
55176        let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
55177        let e = _mm_set_ps(2., 2., 2., 2.);
55178        assert_eq_m128(r, e);
55179    }
55180
55181    #[simd_test(enable = "avx512f,avx512vl")]
55182    fn test_mm_maskz_permutevar_ps() {
55183        let a = _mm_set_ps(0., 1., 2., 3.);
55184        let b = _mm_set1_epi32(0b01);
55185        let r = _mm_maskz_permutevar_ps(0, a, b);
55186        assert_eq_m128(r, _mm_setzero_ps());
55187        let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
55188        let e = _mm_set_ps(2., 2., 2., 2.);
55189        assert_eq_m128(r, e);
55190    }
55191
55192    #[simd_test(enable = "avx512f")]
55193    fn test_mm512_permutexvar_epi32() {
55194        let idx = _mm512_set1_epi32(1);
55195        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55196        let r = _mm512_permutexvar_epi32(idx, a);
55197        let e = _mm512_set1_epi32(14);
55198        assert_eq_m512i(r, e);
55199    }
55200
55201    #[simd_test(enable = "avx512f")]
55202    fn test_mm512_mask_permutexvar_epi32() {
55203        let idx = _mm512_set1_epi32(1);
55204        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55205        let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
55206        assert_eq_m512i(r, a);
55207        let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
55208        let e = _mm512_set1_epi32(14);
55209        assert_eq_m512i(r, e);
55210    }
55211
55212    #[simd_test(enable = "avx512f")]
55213    fn test_mm512_maskz_permutexvar_epi32() {
55214        let idx = _mm512_set1_epi32(1);
55215        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55216        let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
55217        assert_eq_m512i(r, _mm512_setzero_si512());
55218        let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
55219        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
55220        assert_eq_m512i(r, e);
55221    }
55222
55223    #[simd_test(enable = "avx512f,avx512vl")]
55224    fn test_mm256_permutexvar_epi32() {
55225        let idx = _mm256_set1_epi32(1);
55226        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55227        let r = _mm256_permutexvar_epi32(idx, a);
55228        let e = _mm256_set1_epi32(6);
55229        assert_eq_m256i(r, e);
55230    }
55231
55232    #[simd_test(enable = "avx512f,avx512vl")]
55233    fn test_mm256_mask_permutexvar_epi32() {
55234        let idx = _mm256_set1_epi32(1);
55235        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55236        let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
55237        assert_eq_m256i(r, a);
55238        let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
55239        let e = _mm256_set1_epi32(6);
55240        assert_eq_m256i(r, e);
55241    }
55242
55243    #[simd_test(enable = "avx512f,avx512vl")]
55244    fn test_mm256_maskz_permutexvar_epi32() {
55245        let idx = _mm256_set1_epi32(1);
55246        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55247        let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
55248        assert_eq_m256i(r, _mm256_setzero_si256());
55249        let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
55250        let e = _mm256_set1_epi32(6);
55251        assert_eq_m256i(r, e);
55252    }
55253
55254    #[simd_test(enable = "avx512f")]
55255    fn test_mm512_permutexvar_ps() {
55256        let idx = _mm512_set1_epi32(1);
55257        let a = _mm512_set_ps(
55258            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55259        );
55260        let r = _mm512_permutexvar_ps(idx, a);
55261        let e = _mm512_set1_ps(14.);
55262        assert_eq_m512(r, e);
55263    }
55264
55265    #[simd_test(enable = "avx512f")]
55266    fn test_mm512_mask_permutexvar_ps() {
55267        let idx = _mm512_set1_epi32(1);
55268        let a = _mm512_set_ps(
55269            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55270        );
55271        let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
55272        assert_eq_m512(r, a);
55273        let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
55274        let e = _mm512_set1_ps(14.);
55275        assert_eq_m512(r, e);
55276    }
55277
55278    #[simd_test(enable = "avx512f")]
55279    fn test_mm512_maskz_permutexvar_ps() {
55280        let idx = _mm512_set1_epi32(1);
55281        let a = _mm512_set_ps(
55282            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55283        );
55284        let r = _mm512_maskz_permutexvar_ps(0, idx, a);
55285        assert_eq_m512(r, _mm512_setzero_ps());
55286        let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
55287        let e = _mm512_set_ps(
55288            0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
55289        );
55290        assert_eq_m512(r, e);
55291    }
55292
55293    #[simd_test(enable = "avx512f,avx512vl")]
55294    fn test_mm256_permutexvar_ps() {
55295        let idx = _mm256_set1_epi32(1);
55296        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55297        let r = _mm256_permutexvar_ps(idx, a);
55298        let e = _mm256_set1_ps(6.);
55299        assert_eq_m256(r, e);
55300    }
55301
55302    #[simd_test(enable = "avx512f,avx512vl")]
55303    fn test_mm256_mask_permutexvar_ps() {
55304        let idx = _mm256_set1_epi32(1);
55305        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55306        let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
55307        assert_eq_m256(r, a);
55308        let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
55309        let e = _mm256_set1_ps(6.);
55310        assert_eq_m256(r, e);
55311    }
55312
55313    #[simd_test(enable = "avx512f,avx512vl")]
55314    fn test_mm256_maskz_permutexvar_ps() {
55315        let idx = _mm256_set1_epi32(1);
55316        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55317        let r = _mm256_maskz_permutexvar_ps(0, idx, a);
55318        assert_eq_m256(r, _mm256_setzero_ps());
55319        let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
55320        let e = _mm256_set1_ps(6.);
55321        assert_eq_m256(r, e);
55322    }
55323
55324    #[simd_test(enable = "avx512f")]
55325    fn test_mm512_permutex2var_epi32() {
55326        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55327        #[rustfmt::skip]
55328        let idx = _mm512_set_epi32(
55329            1, 1 << 4, 2, 1 << 4,
55330            3, 1 << 4, 4, 1 << 4,
55331            5, 1 << 4, 6, 1 << 4,
55332            7, 1 << 4, 8, 1 << 4,
55333        );
55334        let b = _mm512_set1_epi32(100);
55335        let r = _mm512_permutex2var_epi32(a, idx, b);
55336        let e = _mm512_set_epi32(
55337            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
55338        );
55339        assert_eq_m512i(r, e);
55340    }
55341
55342    #[simd_test(enable = "avx512f")]
55343    fn test_mm512_mask_permutex2var_epi32() {
55344        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55345        #[rustfmt::skip]
55346        let idx = _mm512_set_epi32(
55347            1, 1 << 4, 2, 1 << 4,
55348            3, 1 << 4, 4, 1 << 4,
55349            5, 1 << 4, 6, 1 << 4,
55350            7, 1 << 4, 8, 1 << 4,
55351        );
55352        let b = _mm512_set1_epi32(100);
55353        let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
55354        assert_eq_m512i(r, a);
55355        let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
55356        let e = _mm512_set_epi32(
55357            14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
55358        );
55359        assert_eq_m512i(r, e);
55360    }
55361
55362    #[simd_test(enable = "avx512f")]
55363    fn test_mm512_maskz_permutex2var_epi32() {
55364        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55365        #[rustfmt::skip]
55366        let idx = _mm512_set_epi32(
55367            1, 1 << 4, 2, 1 << 4,
55368            3, 1 << 4, 4, 1 << 4,
55369            5, 1 << 4, 6, 1 << 4,
55370            7, 1 << 4, 8, 1 << 4,
55371        );
55372        let b = _mm512_set1_epi32(100);
55373        let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
55374        assert_eq_m512i(r, _mm512_setzero_si512());
55375        let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
55376        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
55377        assert_eq_m512i(r, e);
55378    }
55379
55380    #[simd_test(enable = "avx512f")]
55381    fn test_mm512_mask2_permutex2var_epi32() {
55382        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55383        #[rustfmt::skip]
55384        let idx = _mm512_set_epi32(
55385            1000, 1 << 4, 2000, 1 << 4,
55386            3000, 1 << 4, 4000, 1 << 4,
55387            5, 1 << 4, 6, 1 << 4,
55388            7, 1 << 4, 8, 1 << 4,
55389        );
55390        let b = _mm512_set1_epi32(100);
55391        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
55392        assert_eq_m512i(r, idx);
55393        let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
55394        #[rustfmt::skip]
55395        let e = _mm512_set_epi32(
55396            1000, 1 << 4, 2000, 1 << 4,
55397            3000, 1 << 4, 4000, 1 << 4,
55398            10, 100, 9, 100,
55399            8, 100, 7, 100,
55400        );
55401        assert_eq_m512i(r, e);
55402    }
55403
55404    #[simd_test(enable = "avx512f,avx512vl")]
55405    fn test_mm256_permutex2var_epi32() {
55406        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55407        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55408        let b = _mm256_set1_epi32(100);
55409        let r = _mm256_permutex2var_epi32(a, idx, b);
55410        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55411        assert_eq_m256i(r, e);
55412    }
55413
55414    #[simd_test(enable = "avx512f,avx512vl")]
55415    fn test_mm256_mask_permutex2var_epi32() {
55416        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55417        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55418        let b = _mm256_set1_epi32(100);
55419        let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
55420        assert_eq_m256i(r, a);
55421        let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
55422        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55423        assert_eq_m256i(r, e);
55424    }
55425
55426    #[simd_test(enable = "avx512f,avx512vl")]
55427    fn test_mm256_maskz_permutex2var_epi32() {
55428        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55429        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55430        let b = _mm256_set1_epi32(100);
55431        let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
55432        assert_eq_m256i(r, _mm256_setzero_si256());
55433        let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
55434        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55435        assert_eq_m256i(r, e);
55436    }
55437
55438    #[simd_test(enable = "avx512f,avx512vl")]
55439    fn test_mm256_mask2_permutex2var_epi32() {
55440        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
55441        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55442        let b = _mm256_set1_epi32(100);
55443        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
55444        assert_eq_m256i(r, idx);
55445        let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
55446        let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
55447        assert_eq_m256i(r, e);
55448    }
55449
55450    #[simd_test(enable = "avx512f,avx512vl")]
55451    fn test_mm_permutex2var_epi32() {
55452        let a = _mm_set_epi32(0, 1, 2, 3);
55453        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55454        let b = _mm_set1_epi32(100);
55455        let r = _mm_permutex2var_epi32(a, idx, b);
55456        let e = _mm_set_epi32(2, 100, 1, 100);
55457        assert_eq_m128i(r, e);
55458    }
55459
55460    #[simd_test(enable = "avx512f,avx512vl")]
55461    fn test_mm_mask_permutex2var_epi32() {
55462        let a = _mm_set_epi32(0, 1, 2, 3);
55463        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55464        let b = _mm_set1_epi32(100);
55465        let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
55466        assert_eq_m128i(r, a);
55467        let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
55468        let e = _mm_set_epi32(2, 100, 1, 100);
55469        assert_eq_m128i(r, e);
55470    }
55471
55472    #[simd_test(enable = "avx512f,avx512vl")]
55473    fn test_mm_maskz_permutex2var_epi32() {
55474        let a = _mm_set_epi32(0, 1, 2, 3);
55475        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55476        let b = _mm_set1_epi32(100);
55477        let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
55478        assert_eq_m128i(r, _mm_setzero_si128());
55479        let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
55480        let e = _mm_set_epi32(2, 100, 1, 100);
55481        assert_eq_m128i(r, e);
55482    }
55483
55484    #[simd_test(enable = "avx512f,avx512vl")]
55485    fn test_mm_mask2_permutex2var_epi32() {
55486        let a = _mm_set_epi32(0, 1, 2, 3);
55487        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55488        let b = _mm_set1_epi32(100);
55489        let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
55490        assert_eq_m128i(r, idx);
55491        let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
55492        let e = _mm_set_epi32(2, 100, 1, 100);
55493        assert_eq_m128i(r, e);
55494    }
55495
55496    #[simd_test(enable = "avx512f")]
55497    fn test_mm512_permutex2var_ps() {
55498        let a = _mm512_set_ps(
55499            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55500        );
55501        #[rustfmt::skip]
55502        let idx = _mm512_set_epi32(
55503            1, 1 << 4, 2, 1 << 4,
55504            3, 1 << 4, 4, 1 << 4,
55505            5, 1 << 4, 6, 1 << 4,
55506            7, 1 << 4, 8, 1 << 4,
55507        );
55508        let b = _mm512_set1_ps(100.);
55509        let r = _mm512_permutex2var_ps(a, idx, b);
55510        let e = _mm512_set_ps(
55511            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
55512        );
55513        assert_eq_m512(r, e);
55514    }
55515
55516    #[simd_test(enable = "avx512f")]
55517    fn test_mm512_mask_permutex2var_ps() {
55518        let a = _mm512_set_ps(
55519            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55520        );
55521        #[rustfmt::skip]
55522        let idx = _mm512_set_epi32(
55523            1, 1 << 4, 2, 1 << 4,
55524            3, 1 << 4, 4, 1 << 4,
55525            5, 1 << 4, 6, 1 << 4,
55526            7, 1 << 4, 8, 1 << 4,
55527        );
55528        let b = _mm512_set1_ps(100.);
55529        let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
55530        assert_eq_m512(r, a);
55531        let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
55532        let e = _mm512_set_ps(
55533            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
55534        );
55535        assert_eq_m512(r, e);
55536    }
55537
55538    #[simd_test(enable = "avx512f")]
55539    fn test_mm512_maskz_permutex2var_ps() {
55540        let a = _mm512_set_ps(
55541            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55542        );
55543        #[rustfmt::skip]
55544        let idx = _mm512_set_epi32(
55545            1, 1 << 4, 2, 1 << 4,
55546            3, 1 << 4, 4, 1 << 4,
55547            5, 1 << 4, 6, 1 << 4,
55548            7, 1 << 4, 8, 1 << 4,
55549        );
55550        let b = _mm512_set1_ps(100.);
55551        let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
55552        assert_eq_m512(r, _mm512_setzero_ps());
55553        let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
55554        let e = _mm512_set_ps(
55555            0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
55556        );
55557        assert_eq_m512(r, e);
55558    }
55559
55560    #[simd_test(enable = "avx512f")]
55561    fn test_mm512_mask2_permutex2var_ps() {
55562        let a = _mm512_set_ps(
55563            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
55564        );
55565        #[rustfmt::skip]
55566        let idx = _mm512_set_epi32(
55567            1, 1 << 4, 2, 1 << 4,
55568            3, 1 << 4, 4, 1 << 4,
55569            5, 1 << 4, 6, 1 << 4,
55570            7, 1 << 4, 8, 1 << 4,
55571        );
55572        let b = _mm512_set1_ps(100.);
55573        let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
55574        assert_eq_m512(r, _mm512_castsi512_ps(idx));
55575        let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
55576        let e = _mm512_set_ps(
55577            14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
55578        );
55579        assert_eq_m512(r, e);
55580    }
55581
55582    #[simd_test(enable = "avx512f,avx512vl")]
55583    fn test_mm256_permutex2var_ps() {
55584        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55585        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55586        let b = _mm256_set1_ps(100.);
55587        let r = _mm256_permutex2var_ps(a, idx, b);
55588        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55589        assert_eq_m256(r, e);
55590    }
55591
55592    #[simd_test(enable = "avx512f,avx512vl")]
55593    fn test_mm256_mask_permutex2var_ps() {
55594        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55595        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55596        let b = _mm256_set1_ps(100.);
55597        let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
55598        assert_eq_m256(r, a);
55599        let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
55600        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55601        assert_eq_m256(r, e);
55602    }
55603
55604    #[simd_test(enable = "avx512f,avx512vl")]
55605    fn test_mm256_maskz_permutex2var_ps() {
55606        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55607        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55608        let b = _mm256_set1_ps(100.);
55609        let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
55610        assert_eq_m256(r, _mm256_setzero_ps());
55611        let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
55612        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55613        assert_eq_m256(r, e);
55614    }
55615
55616    #[simd_test(enable = "avx512f,avx512vl")]
55617    fn test_mm256_mask2_permutex2var_ps() {
55618        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
55619        let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
55620        let b = _mm256_set1_ps(100.);
55621        let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
55622        assert_eq_m256(r, _mm256_castsi256_ps(idx));
55623        let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
55624        let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
55625        assert_eq_m256(r, e);
55626    }
55627
55628    #[simd_test(enable = "avx512f,avx512vl")]
55629    fn test_mm_permutex2var_ps() {
55630        let a = _mm_set_ps(0., 1., 2., 3.);
55631        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55632        let b = _mm_set1_ps(100.);
55633        let r = _mm_permutex2var_ps(a, idx, b);
55634        let e = _mm_set_ps(2., 100., 1., 100.);
55635        assert_eq_m128(r, e);
55636    }
55637
55638    #[simd_test(enable = "avx512f,avx512vl")]
55639    fn test_mm_mask_permutex2var_ps() {
55640        let a = _mm_set_ps(0., 1., 2., 3.);
55641        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55642        let b = _mm_set1_ps(100.);
55643        let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
55644        assert_eq_m128(r, a);
55645        let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
55646        let e = _mm_set_ps(2., 100., 1., 100.);
55647        assert_eq_m128(r, e);
55648    }
55649
55650    #[simd_test(enable = "avx512f,avx512vl")]
55651    fn test_mm_maskz_permutex2var_ps() {
55652        let a = _mm_set_ps(0., 1., 2., 3.);
55653        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55654        let b = _mm_set1_ps(100.);
55655        let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
55656        assert_eq_m128(r, _mm_setzero_ps());
55657        let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
55658        let e = _mm_set_ps(2., 100., 1., 100.);
55659        assert_eq_m128(r, e);
55660    }
55661
55662    #[simd_test(enable = "avx512f,avx512vl")]
55663    fn test_mm_mask2_permutex2var_ps() {
55664        let a = _mm_set_ps(0., 1., 2., 3.);
55665        let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
55666        let b = _mm_set1_ps(100.);
55667        let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
55668        assert_eq_m128(r, _mm_castsi128_ps(idx));
55669        let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
55670        let e = _mm_set_ps(2., 100., 1., 100.);
55671        assert_eq_m128(r, e);
55672    }
55673
55674    #[simd_test(enable = "avx512f")]
55675    const fn test_mm512_shuffle_epi32() {
55676        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55677        let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
55678        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
55679        assert_eq_m512i(r, e);
55680    }
55681
55682    #[simd_test(enable = "avx512f")]
55683    const fn test_mm512_mask_shuffle_epi32() {
55684        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55685        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
55686        assert_eq_m512i(r, a);
55687        let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
55688        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
55689        assert_eq_m512i(r, e);
55690    }
55691
55692    #[simd_test(enable = "avx512f")]
55693    const fn test_mm512_maskz_shuffle_epi32() {
55694        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55695        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
55696        assert_eq_m512i(r, _mm512_setzero_si512());
55697        let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
55698        let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
55699        assert_eq_m512i(r, e);
55700    }
55701
55702    #[simd_test(enable = "avx512f,avx512vl")]
55703    const fn test_mm256_mask_shuffle_epi32() {
55704        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55705        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
55706        assert_eq_m256i(r, a);
55707        let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
55708        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
55709        assert_eq_m256i(r, e);
55710    }
55711
55712    #[simd_test(enable = "avx512f,avx512vl")]
55713    const fn test_mm256_maskz_shuffle_epi32() {
55714        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55715        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
55716        assert_eq_m256i(r, _mm256_setzero_si256());
55717        let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
55718        let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
55719        assert_eq_m256i(r, e);
55720    }
55721
55722    #[simd_test(enable = "avx512f,avx512vl")]
55723    const fn test_mm_mask_shuffle_epi32() {
55724        let a = _mm_set_epi32(1, 4, 5, 8);
55725        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
55726        assert_eq_m128i(r, a);
55727        let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
55728        let e = _mm_set_epi32(8, 8, 1, 1);
55729        assert_eq_m128i(r, e);
55730    }
55731
55732    #[simd_test(enable = "avx512f,avx512vl")]
55733    const fn test_mm_maskz_shuffle_epi32() {
55734        let a = _mm_set_epi32(1, 4, 5, 8);
55735        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
55736        assert_eq_m128i(r, _mm_setzero_si128());
55737        let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
55738        let e = _mm_set_epi32(8, 8, 1, 1);
55739        assert_eq_m128i(r, e);
55740    }
55741
55742    #[simd_test(enable = "avx512f")]
55743    const fn test_mm512_shuffle_ps() {
55744        let a = _mm512_setr_ps(
55745            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55746        );
55747        let b = _mm512_setr_ps(
55748            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55749        );
55750        let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
55751        let e = _mm512_setr_ps(
55752            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
55753        );
55754        assert_eq_m512(r, e);
55755    }
55756
55757    #[simd_test(enable = "avx512f")]
55758    const fn test_mm512_mask_shuffle_ps() {
55759        let a = _mm512_setr_ps(
55760            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55761        );
55762        let b = _mm512_setr_ps(
55763            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55764        );
55765        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
55766        assert_eq_m512(r, a);
55767        let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
55768        let e = _mm512_setr_ps(
55769            8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
55770        );
55771        assert_eq_m512(r, e);
55772    }
55773
55774    #[simd_test(enable = "avx512f")]
55775    const fn test_mm512_maskz_shuffle_ps() {
55776        let a = _mm512_setr_ps(
55777            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55778        );
55779        let b = _mm512_setr_ps(
55780            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55781        );
55782        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
55783        assert_eq_m512(r, _mm512_setzero_ps());
55784        let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
55785        let e = _mm512_setr_ps(
55786            8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
55787        );
55788        assert_eq_m512(r, e);
55789    }
55790
55791    #[simd_test(enable = "avx512f,avx512vl")]
55792    const fn test_mm256_mask_shuffle_ps() {
55793        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
55794        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
55795        let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
55796        assert_eq_m256(r, a);
55797        let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
55798        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
55799        assert_eq_m256(r, e);
55800    }
55801
55802    #[simd_test(enable = "avx512f,avx512vl")]
55803    const fn test_mm256_maskz_shuffle_ps() {
55804        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
55805        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
55806        let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
55807        assert_eq_m256(r, _mm256_setzero_ps());
55808        let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
55809        let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
55810        assert_eq_m256(r, e);
55811    }
55812
55813    #[simd_test(enable = "avx512f,avx512vl")]
55814    const fn test_mm_mask_shuffle_ps() {
55815        let a = _mm_set_ps(1., 4., 5., 8.);
55816        let b = _mm_set_ps(2., 3., 6., 7.);
55817        let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
55818        assert_eq_m128(r, a);
55819        let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
55820        let e = _mm_set_ps(7., 7., 1., 1.);
55821        assert_eq_m128(r, e);
55822    }
55823
55824    #[simd_test(enable = "avx512f,avx512vl")]
55825    const fn test_mm_maskz_shuffle_ps() {
55826        let a = _mm_set_ps(1., 4., 5., 8.);
55827        let b = _mm_set_ps(2., 3., 6., 7.);
55828        let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
55829        assert_eq_m128(r, _mm_setzero_ps());
55830        let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
55831        let e = _mm_set_ps(7., 7., 1., 1.);
55832        assert_eq_m128(r, e);
55833    }
55834
55835    #[simd_test(enable = "avx512f")]
55836    const fn test_mm512_shuffle_i32x4() {
55837        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55838        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
55839        let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
55840        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
55841        assert_eq_m512i(r, e);
55842    }
55843
55844    #[simd_test(enable = "avx512f")]
55845    const fn test_mm512_mask_shuffle_i32x4() {
55846        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55847        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
55848        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
55849        assert_eq_m512i(r, a);
55850        let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
55851        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
55852        assert_eq_m512i(r, e);
55853    }
55854
55855    #[simd_test(enable = "avx512f")]
55856    const fn test_mm512_maskz_shuffle_i32x4() {
55857        let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
55858        let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
55859        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
55860        assert_eq_m512i(r, _mm512_setzero_si512());
55861        let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
55862        let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
55863        assert_eq_m512i(r, e);
55864    }
55865
55866    #[simd_test(enable = "avx512f,avx512vl")]
55867    const fn test_mm256_shuffle_i32x4() {
55868        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55869        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
55870        let r = _mm256_shuffle_i32x4::<0b00>(a, b);
55871        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
55872        assert_eq_m256i(r, e);
55873    }
55874
55875    #[simd_test(enable = "avx512f,avx512vl")]
55876    const fn test_mm256_mask_shuffle_i32x4() {
55877        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55878        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
55879        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
55880        assert_eq_m256i(r, a);
55881        let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
55882        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
55883        assert_eq_m256i(r, e);
55884    }
55885
55886    #[simd_test(enable = "avx512f,avx512vl")]
55887    const fn test_mm256_maskz_shuffle_i32x4() {
55888        let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
55889        let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
55890        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
55891        assert_eq_m256i(r, _mm256_setzero_si256());
55892        let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
55893        let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
55894        assert_eq_m256i(r, e);
55895    }
55896
55897    #[simd_test(enable = "avx512f")]
55898    const fn test_mm512_shuffle_f32x4() {
55899        let a = _mm512_setr_ps(
55900            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55901        );
55902        let b = _mm512_setr_ps(
55903            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55904        );
55905        let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
55906        let e = _mm512_setr_ps(
55907            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
55908        );
55909        assert_eq_m512(r, e);
55910    }
55911
55912    #[simd_test(enable = "avx512f")]
55913    const fn test_mm512_mask_shuffle_f32x4() {
55914        let a = _mm512_setr_ps(
55915            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55916        );
55917        let b = _mm512_setr_ps(
55918            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55919        );
55920        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
55921        assert_eq_m512(r, a);
55922        let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
55923        let e = _mm512_setr_ps(
55924            1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
55925        );
55926        assert_eq_m512(r, e);
55927    }
55928
55929    #[simd_test(enable = "avx512f")]
55930    const fn test_mm512_maskz_shuffle_f32x4() {
55931        let a = _mm512_setr_ps(
55932            1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
55933        );
55934        let b = _mm512_setr_ps(
55935            2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
55936        );
55937        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
55938        assert_eq_m512(r, _mm512_setzero_ps());
55939        let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
55940        let e = _mm512_setr_ps(
55941            1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
55942        );
55943        assert_eq_m512(r, e);
55944    }
55945
55946    #[simd_test(enable = "avx512f,avx512vl")]
55947    const fn test_mm256_shuffle_f32x4() {
55948        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
55949        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
55950        let r = _mm256_shuffle_f32x4::<0b00>(a, b);
55951        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
55952        assert_eq_m256(r, e);
55953    }
55954
55955    #[simd_test(enable = "avx512f,avx512vl")]
55956    const fn test_mm256_mask_shuffle_f32x4() {
55957        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
55958        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
55959        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
55960        assert_eq_m256(r, a);
55961        let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
55962        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
55963        assert_eq_m256(r, e);
55964    }
55965
55966    #[simd_test(enable = "avx512f,avx512vl")]
55967    const fn test_mm256_maskz_shuffle_f32x4() {
55968        let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
55969        let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
55970        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
55971        assert_eq_m256(r, _mm256_setzero_ps());
55972        let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
55973        let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
55974        assert_eq_m256(r, e);
55975    }
55976
55977    #[simd_test(enable = "avx512f")]
55978    const fn test_mm512_extractf32x4_ps() {
55979        let a = _mm512_setr_ps(
55980            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55981        );
55982        let r = _mm512_extractf32x4_ps::<1>(a);
55983        let e = _mm_setr_ps(5., 6., 7., 8.);
55984        assert_eq_m128(r, e);
55985    }
55986
55987    #[simd_test(enable = "avx512f")]
55988    const fn test_mm512_mask_extractf32x4_ps() {
55989        let a = _mm512_setr_ps(
55990            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55991        );
55992        let src = _mm_set1_ps(100.);
55993        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
55994        assert_eq_m128(r, src);
55995        let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
55996        let e = _mm_setr_ps(5., 6., 7., 8.);
55997        assert_eq_m128(r, e);
55998    }
55999
56000    #[simd_test(enable = "avx512f")]
56001    const fn test_mm512_maskz_extractf32x4_ps() {
56002        let a = _mm512_setr_ps(
56003            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56004        );
56005        let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
56006        assert_eq_m128(r, _mm_setzero_ps());
56007        let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
56008        let e = _mm_setr_ps(5., 0., 0., 0.);
56009        assert_eq_m128(r, e);
56010    }
56011
56012    #[simd_test(enable = "avx512f,avx512vl")]
56013    const fn test_mm256_extractf32x4_ps() {
56014        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56015        let r = _mm256_extractf32x4_ps::<1>(a);
56016        let e = _mm_set_ps(1., 2., 3., 4.);
56017        assert_eq_m128(r, e);
56018    }
56019
56020    #[simd_test(enable = "avx512f,avx512vl")]
56021    const fn test_mm256_mask_extractf32x4_ps() {
56022        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56023        let src = _mm_set1_ps(100.);
56024        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
56025        assert_eq_m128(r, src);
56026        let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
56027        let e = _mm_set_ps(1., 2., 3., 4.);
56028        assert_eq_m128(r, e);
56029    }
56030
56031    #[simd_test(enable = "avx512f,avx512vl")]
56032    const fn test_mm256_maskz_extractf32x4_ps() {
56033        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56034        let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
56035        assert_eq_m128(r, _mm_setzero_ps());
56036        let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
56037        let e = _mm_set_ps(1., 2., 3., 4.);
56038        assert_eq_m128(r, e);
56039    }
56040
56041    #[simd_test(enable = "avx512f")]
56042    const fn test_mm512_extracti32x4_epi32() {
56043        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56044        let r = _mm512_extracti32x4_epi32::<1>(a);
56045        let e = _mm_setr_epi32(5, 6, 7, 8);
56046        assert_eq_m128i(r, e);
56047    }
56048
56049    #[simd_test(enable = "avx512f")]
56050    const fn test_mm512_mask_extracti32x4_epi32() {
56051        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56052        let src = _mm_set1_epi32(100);
56053        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
56054        assert_eq_m128i(r, src);
56055        let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
56056        let e = _mm_setr_epi32(5, 6, 7, 8);
56057        assert_eq_m128i(r, e);
56058    }
56059
56060    #[simd_test(enable = "avx512f,avx512vl")]
56061    const fn test_mm512_maskz_extracti32x4_epi32() {
56062        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56063        let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
56064        assert_eq_m128i(r, _mm_setzero_si128());
56065        let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
56066        let e = _mm_setr_epi32(5, 0, 0, 0);
56067        assert_eq_m128i(r, e);
56068    }
56069
56070    #[simd_test(enable = "avx512f,avx512vl")]
56071    const fn test_mm256_extracti32x4_epi32() {
56072        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56073        let r = _mm256_extracti32x4_epi32::<1>(a);
56074        let e = _mm_set_epi32(1, 2, 3, 4);
56075        assert_eq_m128i(r, e);
56076    }
56077
56078    #[simd_test(enable = "avx512f,avx512vl")]
56079    const fn test_mm256_mask_extracti32x4_epi32() {
56080        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56081        let src = _mm_set1_epi32(100);
56082        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
56083        assert_eq_m128i(r, src);
56084        let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
56085        let e = _mm_set_epi32(1, 2, 3, 4);
56086        assert_eq_m128i(r, e);
56087    }
56088
56089    #[simd_test(enable = "avx512f,avx512vl")]
56090    const fn test_mm256_maskz_extracti32x4_epi32() {
56091        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56092        let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
56093        assert_eq_m128i(r, _mm_setzero_si128());
56094        let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
56095        let e = _mm_set_epi32(1, 2, 3, 4);
56096        assert_eq_m128i(r, e);
56097    }
56098
56099    #[simd_test(enable = "avx512f")]
56100    const fn test_mm512_moveldup_ps() {
56101        let a = _mm512_setr_ps(
56102            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56103        );
56104        let r = _mm512_moveldup_ps(a);
56105        let e = _mm512_setr_ps(
56106            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
56107        );
56108        assert_eq_m512(r, e);
56109    }
56110
56111    #[simd_test(enable = "avx512f")]
56112    const fn test_mm512_mask_moveldup_ps() {
56113        let a = _mm512_setr_ps(
56114            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56115        );
56116        let r = _mm512_mask_moveldup_ps(a, 0, a);
56117        assert_eq_m512(r, a);
56118        let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
56119        let e = _mm512_setr_ps(
56120            1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
56121        );
56122        assert_eq_m512(r, e);
56123    }
56124
56125    #[simd_test(enable = "avx512f")]
56126    const fn test_mm512_maskz_moveldup_ps() {
56127        let a = _mm512_setr_ps(
56128            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56129        );
56130        let r = _mm512_maskz_moveldup_ps(0, a);
56131        assert_eq_m512(r, _mm512_setzero_ps());
56132        let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
56133        let e = _mm512_setr_ps(
56134            1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
56135        );
56136        assert_eq_m512(r, e);
56137    }
56138
56139    #[simd_test(enable = "avx512f,avx512vl")]
56140    const fn test_mm256_mask_moveldup_ps() {
56141        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56142        let r = _mm256_mask_moveldup_ps(a, 0, a);
56143        assert_eq_m256(r, a);
56144        let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
56145        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
56146        assert_eq_m256(r, e);
56147    }
56148
56149    #[simd_test(enable = "avx512f,avx512vl")]
56150    const fn test_mm256_maskz_moveldup_ps() {
56151        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56152        let r = _mm256_maskz_moveldup_ps(0, a);
56153        assert_eq_m256(r, _mm256_setzero_ps());
56154        let r = _mm256_maskz_moveldup_ps(0b11111111, a);
56155        let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
56156        assert_eq_m256(r, e);
56157    }
56158
56159    #[simd_test(enable = "avx512f,avx512vl")]
56160    const fn test_mm_mask_moveldup_ps() {
56161        let a = _mm_set_ps(1., 2., 3., 4.);
56162        let r = _mm_mask_moveldup_ps(a, 0, a);
56163        assert_eq_m128(r, a);
56164        let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
56165        let e = _mm_set_ps(2., 2., 4., 4.);
56166        assert_eq_m128(r, e);
56167    }
56168
56169    #[simd_test(enable = "avx512f,avx512vl")]
56170    const fn test_mm_maskz_moveldup_ps() {
56171        let a = _mm_set_ps(1., 2., 3., 4.);
56172        let r = _mm_maskz_moveldup_ps(0, a);
56173        assert_eq_m128(r, _mm_setzero_ps());
56174        let r = _mm_maskz_moveldup_ps(0b00001111, a);
56175        let e = _mm_set_ps(2., 2., 4., 4.);
56176        assert_eq_m128(r, e);
56177    }
56178
56179    #[simd_test(enable = "avx512f")]
56180    const fn test_mm512_movehdup_ps() {
56181        let a = _mm512_setr_ps(
56182            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56183        );
56184        let r = _mm512_movehdup_ps(a);
56185        let e = _mm512_setr_ps(
56186            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
56187        );
56188        assert_eq_m512(r, e);
56189    }
56190
56191    #[simd_test(enable = "avx512f")]
56192    const fn test_mm512_mask_movehdup_ps() {
56193        let a = _mm512_setr_ps(
56194            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56195        );
56196        let r = _mm512_mask_movehdup_ps(a, 0, a);
56197        assert_eq_m512(r, a);
56198        let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
56199        let e = _mm512_setr_ps(
56200            2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
56201        );
56202        assert_eq_m512(r, e);
56203    }
56204
56205    #[simd_test(enable = "avx512f")]
56206    const fn test_mm512_maskz_movehdup_ps() {
56207        let a = _mm512_setr_ps(
56208            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56209        );
56210        let r = _mm512_maskz_movehdup_ps(0, a);
56211        assert_eq_m512(r, _mm512_setzero_ps());
56212        let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
56213        let e = _mm512_setr_ps(
56214            2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
56215        );
56216        assert_eq_m512(r, e);
56217    }
56218
56219    #[simd_test(enable = "avx512f,avx512vl")]
56220    const fn test_mm256_mask_movehdup_ps() {
56221        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56222        let r = _mm256_mask_movehdup_ps(a, 0, a);
56223        assert_eq_m256(r, a);
56224        let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
56225        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
56226        assert_eq_m256(r, e);
56227    }
56228
56229    #[simd_test(enable = "avx512f,avx512vl")]
56230    const fn test_mm256_maskz_movehdup_ps() {
56231        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56232        let r = _mm256_maskz_movehdup_ps(0, a);
56233        assert_eq_m256(r, _mm256_setzero_ps());
56234        let r = _mm256_maskz_movehdup_ps(0b11111111, a);
56235        let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
56236        assert_eq_m256(r, e);
56237    }
56238
56239    #[simd_test(enable = "avx512f,avx512vl")]
56240    const fn test_mm_mask_movehdup_ps() {
56241        let a = _mm_set_ps(1., 2., 3., 4.);
56242        let r = _mm_mask_movehdup_ps(a, 0, a);
56243        assert_eq_m128(r, a);
56244        let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
56245        let e = _mm_set_ps(1., 1., 3., 3.);
56246        assert_eq_m128(r, e);
56247    }
56248
56249    #[simd_test(enable = "avx512f,avx512vl")]
56250    const fn test_mm_maskz_movehdup_ps() {
56251        let a = _mm_set_ps(1., 2., 3., 4.);
56252        let r = _mm_maskz_movehdup_ps(0, a);
56253        assert_eq_m128(r, _mm_setzero_ps());
56254        let r = _mm_maskz_movehdup_ps(0b00001111, a);
56255        let e = _mm_set_ps(1., 1., 3., 3.);
56256        assert_eq_m128(r, e);
56257    }
56258
56259    #[simd_test(enable = "avx512f")]
56260    const fn test_mm512_inserti32x4() {
56261        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56262        let b = _mm_setr_epi32(17, 18, 19, 20);
56263        let r = _mm512_inserti32x4::<0>(a, b);
56264        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56265        assert_eq_m512i(r, e);
56266    }
56267
56268    #[simd_test(enable = "avx512f")]
56269    const fn test_mm512_mask_inserti32x4() {
56270        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56271        let b = _mm_setr_epi32(17, 18, 19, 20);
56272        let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
56273        assert_eq_m512i(r, a);
56274        let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
56275        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56276        assert_eq_m512i(r, e);
56277    }
56278
56279    #[simd_test(enable = "avx512f")]
56280    const fn test_mm512_maskz_inserti32x4() {
56281        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56282        let b = _mm_setr_epi32(17, 18, 19, 20);
56283        let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
56284        assert_eq_m512i(r, _mm512_setzero_si512());
56285        let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
56286        let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
56287        assert_eq_m512i(r, e);
56288    }
56289
56290    #[simd_test(enable = "avx512f,avx512vl")]
56291    const fn test_mm256_inserti32x4() {
56292        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56293        let b = _mm_set_epi32(17, 18, 19, 20);
56294        let r = _mm256_inserti32x4::<1>(a, b);
56295        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
56296        assert_eq_m256i(r, e);
56297    }
56298
56299    #[simd_test(enable = "avx512f,avx512vl")]
56300    const fn test_mm256_mask_inserti32x4() {
56301        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56302        let b = _mm_set_epi32(17, 18, 19, 20);
56303        let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
56304        assert_eq_m256i(r, a);
56305        let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
56306        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
56307        assert_eq_m256i(r, e);
56308    }
56309
56310    #[simd_test(enable = "avx512f,avx512vl")]
56311    const fn test_mm256_maskz_inserti32x4() {
56312        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56313        let b = _mm_set_epi32(17, 18, 19, 20);
56314        let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
56315        assert_eq_m256i(r, _mm256_setzero_si256());
56316        let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
56317        let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
56318        assert_eq_m256i(r, e);
56319    }
56320
56321    #[simd_test(enable = "avx512f")]
56322    const fn test_mm512_insertf32x4() {
56323        let a = _mm512_setr_ps(
56324            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56325        );
56326        let b = _mm_setr_ps(17., 18., 19., 20.);
56327        let r = _mm512_insertf32x4::<0>(a, b);
56328        let e = _mm512_setr_ps(
56329            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56330        );
56331        assert_eq_m512(r, e);
56332    }
56333
56334    #[simd_test(enable = "avx512f")]
56335    const fn test_mm512_mask_insertf32x4() {
56336        let a = _mm512_setr_ps(
56337            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56338        );
56339        let b = _mm_setr_ps(17., 18., 19., 20.);
56340        let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
56341        assert_eq_m512(r, a);
56342        let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
56343        let e = _mm512_setr_ps(
56344            17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56345        );
56346        assert_eq_m512(r, e);
56347    }
56348
56349    #[simd_test(enable = "avx512f")]
56350    const fn test_mm512_maskz_insertf32x4() {
56351        let a = _mm512_setr_ps(
56352            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56353        );
56354        let b = _mm_setr_ps(17., 18., 19., 20.);
56355        let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
56356        assert_eq_m512(r, _mm512_setzero_ps());
56357        let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
56358        let e = _mm512_setr_ps(
56359            17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
56360        );
56361        assert_eq_m512(r, e);
56362    }
56363
56364    #[simd_test(enable = "avx512f,avx512vl")]
56365    const fn test_mm256_insertf32x4() {
56366        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56367        let b = _mm_set_ps(17., 18., 19., 20.);
56368        let r = _mm256_insertf32x4::<1>(a, b);
56369        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
56370        assert_eq_m256(r, e);
56371    }
56372
56373    #[simd_test(enable = "avx512f,avx512vl")]
56374    const fn test_mm256_mask_insertf32x4() {
56375        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56376        let b = _mm_set_ps(17., 18., 19., 20.);
56377        let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
56378        assert_eq_m256(r, a);
56379        let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
56380        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
56381        assert_eq_m256(r, e);
56382    }
56383
56384    #[simd_test(enable = "avx512f,avx512vl")]
56385    const fn test_mm256_maskz_insertf32x4() {
56386        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56387        let b = _mm_set_ps(17., 18., 19., 20.);
56388        let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
56389        assert_eq_m256(r, _mm256_setzero_ps());
56390        let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
56391        let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
56392        assert_eq_m256(r, e);
56393    }
56394
56395    #[simd_test(enable = "avx512f")]
56396    const fn test_mm512_castps128_ps512() {
56397        let a = _mm_setr_ps(17., 18., 19., 20.);
56398        let r = _mm512_castps128_ps512(a);
56399        assert_eq_m128(_mm512_castps512_ps128(r), a);
56400    }
56401
56402    #[simd_test(enable = "avx512f")]
56403    const fn test_mm512_castps256_ps512() {
56404        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56405        let r = _mm512_castps256_ps512(a);
56406        assert_eq_m256(_mm512_castps512_ps256(r), a);
56407    }
56408
56409    #[simd_test(enable = "avx512f")]
56410    const fn test_mm512_zextps128_ps512() {
56411        let a = _mm_setr_ps(17., 18., 19., 20.);
56412        let r = _mm512_zextps128_ps512(a);
56413        let e = _mm512_setr_ps(
56414            17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
56415        );
56416        assert_eq_m512(r, e);
56417    }
56418
56419    #[simd_test(enable = "avx512f")]
56420    const fn test_mm512_zextps256_ps512() {
56421        let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56422        let r = _mm512_zextps256_ps512(a);
56423        let e = _mm512_setr_ps(
56424            17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
56425        );
56426        assert_eq_m512(r, e);
56427    }
56428
56429    #[simd_test(enable = "avx512f")]
56430    const fn test_mm512_castps512_ps128() {
56431        let a = _mm512_setr_ps(
56432            17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
56433        );
56434        let r = _mm512_castps512_ps128(a);
56435        let e = _mm_setr_ps(17., 18., 19., 20.);
56436        assert_eq_m128(r, e);
56437    }
56438
56439    #[simd_test(enable = "avx512f")]
56440    const fn test_mm512_castps512_ps256() {
56441        let a = _mm512_setr_ps(
56442            17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
56443        );
56444        let r = _mm512_castps512_ps256(a);
56445        let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56446        assert_eq_m256(r, e);
56447    }
56448
56449    #[simd_test(enable = "avx512f")]
56450    const fn test_mm512_castps_pd() {
56451        let a = _mm512_set1_ps(1.);
56452        let r = _mm512_castps_pd(a);
56453        let e = _mm512_set1_pd(0.007812501848093234);
56454        assert_eq_m512d(r, e);
56455    }
56456
56457    #[simd_test(enable = "avx512f")]
56458    const fn test_mm512_castps_si512() {
56459        let a = _mm512_set1_ps(1.);
56460        let r = _mm512_castps_si512(a);
56461        let e = _mm512_set1_epi32(1065353216);
56462        assert_eq_m512i(r, e);
56463    }
56464
56465    #[simd_test(enable = "avx512f")]
56466    const fn test_mm512_broadcastd_epi32() {
56467        let a = _mm_set_epi32(17, 18, 19, 20);
56468        let r = _mm512_broadcastd_epi32(a);
56469        let e = _mm512_set1_epi32(20);
56470        assert_eq_m512i(r, e);
56471    }
56472
56473    #[simd_test(enable = "avx512f")]
56474    const fn test_mm512_mask_broadcastd_epi32() {
56475        let src = _mm512_set1_epi32(20);
56476        let a = _mm_set_epi32(17, 18, 19, 20);
56477        let r = _mm512_mask_broadcastd_epi32(src, 0, a);
56478        assert_eq_m512i(r, src);
56479        let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
56480        let e = _mm512_set1_epi32(20);
56481        assert_eq_m512i(r, e);
56482    }
56483
56484    #[simd_test(enable = "avx512f")]
56485    const fn test_mm512_maskz_broadcastd_epi32() {
56486        let a = _mm_set_epi32(17, 18, 19, 20);
56487        let r = _mm512_maskz_broadcastd_epi32(0, a);
56488        assert_eq_m512i(r, _mm512_setzero_si512());
56489        let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
56490        let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
56491        assert_eq_m512i(r, e);
56492    }
56493
56494    #[simd_test(enable = "avx512f,avx512vl")]
56495    const fn test_mm256_mask_broadcastd_epi32() {
56496        let src = _mm256_set1_epi32(20);
56497        let a = _mm_set_epi32(17, 18, 19, 20);
56498        let r = _mm256_mask_broadcastd_epi32(src, 0, a);
56499        assert_eq_m256i(r, src);
56500        let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
56501        let e = _mm256_set1_epi32(20);
56502        assert_eq_m256i(r, e);
56503    }
56504
56505    #[simd_test(enable = "avx512f,avx512vl")]
56506    const fn test_mm256_maskz_broadcastd_epi32() {
56507        let a = _mm_set_epi32(17, 18, 19, 20);
56508        let r = _mm256_maskz_broadcastd_epi32(0, a);
56509        assert_eq_m256i(r, _mm256_setzero_si256());
56510        let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
56511        let e = _mm256_set1_epi32(20);
56512        assert_eq_m256i(r, e);
56513    }
56514
56515    #[simd_test(enable = "avx512f,avx512vl")]
56516    const fn test_mm_mask_broadcastd_epi32() {
56517        let src = _mm_set1_epi32(20);
56518        let a = _mm_set_epi32(17, 18, 19, 20);
56519        let r = _mm_mask_broadcastd_epi32(src, 0, a);
56520        assert_eq_m128i(r, src);
56521        let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
56522        let e = _mm_set1_epi32(20);
56523        assert_eq_m128i(r, e);
56524    }
56525
56526    #[simd_test(enable = "avx512f,avx512vl")]
56527    const fn test_mm_maskz_broadcastd_epi32() {
56528        let a = _mm_set_epi32(17, 18, 19, 20);
56529        let r = _mm_maskz_broadcastd_epi32(0, a);
56530        assert_eq_m128i(r, _mm_setzero_si128());
56531        let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
56532        let e = _mm_set1_epi32(20);
56533        assert_eq_m128i(r, e);
56534    }
56535
56536    #[simd_test(enable = "avx512f")]
56537    const fn test_mm512_broadcastss_ps() {
56538        let a = _mm_set_ps(17., 18., 19., 20.);
56539        let r = _mm512_broadcastss_ps(a);
56540        let e = _mm512_set1_ps(20.);
56541        assert_eq_m512(r, e);
56542    }
56543
56544    #[simd_test(enable = "avx512f")]
56545    const fn test_mm512_mask_broadcastss_ps() {
56546        let src = _mm512_set1_ps(20.);
56547        let a = _mm_set_ps(17., 18., 19., 20.);
56548        let r = _mm512_mask_broadcastss_ps(src, 0, a);
56549        assert_eq_m512(r, src);
56550        let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
56551        let e = _mm512_set1_ps(20.);
56552        assert_eq_m512(r, e);
56553    }
56554
56555    #[simd_test(enable = "avx512f")]
56556    const fn test_mm512_maskz_broadcastss_ps() {
56557        let a = _mm_set_ps(17., 18., 19., 20.);
56558        let r = _mm512_maskz_broadcastss_ps(0, a);
56559        assert_eq_m512(r, _mm512_setzero_ps());
56560        let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
56561        let e = _mm512_setr_ps(
56562            20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
56563        );
56564        assert_eq_m512(r, e);
56565    }
56566
56567    #[simd_test(enable = "avx512f,avx512vl")]
56568    const fn test_mm256_mask_broadcastss_ps() {
56569        let src = _mm256_set1_ps(20.);
56570        let a = _mm_set_ps(17., 18., 19., 20.);
56571        let r = _mm256_mask_broadcastss_ps(src, 0, a);
56572        assert_eq_m256(r, src);
56573        let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
56574        let e = _mm256_set1_ps(20.);
56575        assert_eq_m256(r, e);
56576    }
56577
56578    #[simd_test(enable = "avx512f,avx512vl")]
56579    const fn test_mm256_maskz_broadcastss_ps() {
56580        let a = _mm_set_ps(17., 18., 19., 20.);
56581        let r = _mm256_maskz_broadcastss_ps(0, a);
56582        assert_eq_m256(r, _mm256_setzero_ps());
56583        let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
56584        let e = _mm256_set1_ps(20.);
56585        assert_eq_m256(r, e);
56586    }
56587
56588    #[simd_test(enable = "avx512f,avx512vl")]
56589    const fn test_mm_mask_broadcastss_ps() {
56590        let src = _mm_set1_ps(20.);
56591        let a = _mm_set_ps(17., 18., 19., 20.);
56592        let r = _mm_mask_broadcastss_ps(src, 0, a);
56593        assert_eq_m128(r, src);
56594        let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
56595        let e = _mm_set1_ps(20.);
56596        assert_eq_m128(r, e);
56597    }
56598
56599    #[simd_test(enable = "avx512f,avx512vl")]
56600    const fn test_mm_maskz_broadcastss_ps() {
56601        let a = _mm_set_ps(17., 18., 19., 20.);
56602        let r = _mm_maskz_broadcastss_ps(0, a);
56603        assert_eq_m128(r, _mm_setzero_ps());
56604        let r = _mm_maskz_broadcastss_ps(0b00001111, a);
56605        let e = _mm_set1_ps(20.);
56606        assert_eq_m128(r, e);
56607    }
56608
56609    #[simd_test(enable = "avx512f")]
56610    const fn test_mm512_broadcast_i32x4() {
56611        let a = _mm_set_epi32(17, 18, 19, 20);
56612        let r = _mm512_broadcast_i32x4(a);
56613        let e = _mm512_set_epi32(
56614            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
56615        );
56616        assert_eq_m512i(r, e);
56617    }
56618
56619    #[simd_test(enable = "avx512f")]
56620    const fn test_mm512_mask_broadcast_i32x4() {
56621        let src = _mm512_set1_epi32(20);
56622        let a = _mm_set_epi32(17, 18, 19, 20);
56623        let r = _mm512_mask_broadcast_i32x4(src, 0, a);
56624        assert_eq_m512i(r, src);
56625        let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
56626        let e = _mm512_set_epi32(
56627            17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
56628        );
56629        assert_eq_m512i(r, e);
56630    }
56631
56632    #[simd_test(enable = "avx512f")]
56633    const fn test_mm512_maskz_broadcast_i32x4() {
56634        let a = _mm_set_epi32(17, 18, 19, 20);
56635        let r = _mm512_maskz_broadcast_i32x4(0, a);
56636        assert_eq_m512i(r, _mm512_setzero_si512());
56637        let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
56638        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
56639        assert_eq_m512i(r, e);
56640    }
56641
56642    #[simd_test(enable = "avx512f,avx512vl")]
56643    const fn test_mm256_broadcast_i32x4() {
56644        let a = _mm_set_epi32(17, 18, 19, 20);
56645        let r = _mm256_broadcast_i32x4(a);
56646        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
56647        assert_eq_m256i(r, e);
56648    }
56649
56650    #[simd_test(enable = "avx512f,avx512vl")]
56651    const fn test_mm256_mask_broadcast_i32x4() {
56652        let src = _mm256_set1_epi32(20);
56653        let a = _mm_set_epi32(17, 18, 19, 20);
56654        let r = _mm256_mask_broadcast_i32x4(src, 0, a);
56655        assert_eq_m256i(r, src);
56656        let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
56657        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
56658        assert_eq_m256i(r, e);
56659    }
56660
56661    #[simd_test(enable = "avx512f,avx512vl")]
56662    const fn test_mm256_maskz_broadcast_i32x4() {
56663        let a = _mm_set_epi32(17, 18, 19, 20);
56664        let r = _mm256_maskz_broadcast_i32x4(0, a);
56665        assert_eq_m256i(r, _mm256_setzero_si256());
56666        let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
56667        let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
56668        assert_eq_m256i(r, e);
56669    }
56670
56671    #[simd_test(enable = "avx512f")]
56672    const fn test_mm512_broadcast_f32x4() {
56673        let a = _mm_set_ps(17., 18., 19., 20.);
56674        let r = _mm512_broadcast_f32x4(a);
56675        let e = _mm512_set_ps(
56676            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
56677        );
56678        assert_eq_m512(r, e);
56679    }
56680
56681    #[simd_test(enable = "avx512f")]
56682    const fn test_mm512_mask_broadcast_f32x4() {
56683        let src = _mm512_set1_ps(20.);
56684        let a = _mm_set_ps(17., 18., 19., 20.);
56685        let r = _mm512_mask_broadcast_f32x4(src, 0, a);
56686        assert_eq_m512(r, src);
56687        let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
56688        let e = _mm512_set_ps(
56689            17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
56690        );
56691        assert_eq_m512(r, e);
56692    }
56693
56694    #[simd_test(enable = "avx512f")]
56695    const fn test_mm512_maskz_broadcast_f32x4() {
56696        let a = _mm_set_ps(17., 18., 19., 20.);
56697        let r = _mm512_maskz_broadcast_f32x4(0, a);
56698        assert_eq_m512(r, _mm512_setzero_ps());
56699        let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
56700        let e = _mm512_set_ps(
56701            0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
56702        );
56703        assert_eq_m512(r, e);
56704    }
56705
56706    #[simd_test(enable = "avx512f,avx512vl")]
56707    const fn test_mm256_broadcast_f32x4() {
56708        let a = _mm_set_ps(17., 18., 19., 20.);
56709        let r = _mm256_broadcast_f32x4(a);
56710        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
56711        assert_eq_m256(r, e);
56712    }
56713
56714    #[simd_test(enable = "avx512f,avx512vl")]
56715    const fn test_mm256_mask_broadcast_f32x4() {
56716        let src = _mm256_set1_ps(20.);
56717        let a = _mm_set_ps(17., 18., 19., 20.);
56718        let r = _mm256_mask_broadcast_f32x4(src, 0, a);
56719        assert_eq_m256(r, src);
56720        let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
56721        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
56722        assert_eq_m256(r, e);
56723    }
56724
56725    #[simd_test(enable = "avx512f,avx512vl")]
56726    const fn test_mm256_maskz_broadcast_f32x4() {
56727        let a = _mm_set_ps(17., 18., 19., 20.);
56728        let r = _mm256_maskz_broadcast_f32x4(0, a);
56729        assert_eq_m256(r, _mm256_setzero_ps());
56730        let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
56731        let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
56732        assert_eq_m256(r, e);
56733    }
56734
56735    #[simd_test(enable = "avx512f")]
56736    const fn test_mm512_mask_blend_epi32() {
56737        let a = _mm512_set1_epi32(1);
56738        let b = _mm512_set1_epi32(2);
56739        let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
56740        let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
56741        assert_eq_m512i(r, e);
56742    }
56743
56744    #[simd_test(enable = "avx512f,avx512vl")]
56745    const fn test_mm256_mask_blend_epi32() {
56746        let a = _mm256_set1_epi32(1);
56747        let b = _mm256_set1_epi32(2);
56748        let r = _mm256_mask_blend_epi32(0b11111111, a, b);
56749        let e = _mm256_set1_epi32(2);
56750        assert_eq_m256i(r, e);
56751    }
56752
56753    #[simd_test(enable = "avx512f,avx512vl")]
56754    const fn test_mm_mask_blend_epi32() {
56755        let a = _mm_set1_epi32(1);
56756        let b = _mm_set1_epi32(2);
56757        let r = _mm_mask_blend_epi32(0b00001111, a, b);
56758        let e = _mm_set1_epi32(2);
56759        assert_eq_m128i(r, e);
56760    }
56761
56762    #[simd_test(enable = "avx512f")]
56763    const fn test_mm512_mask_blend_ps() {
56764        let a = _mm512_set1_ps(1.);
56765        let b = _mm512_set1_ps(2.);
56766        let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
56767        let e = _mm512_set_ps(
56768            2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
56769        );
56770        assert_eq_m512(r, e);
56771    }
56772
56773    #[simd_test(enable = "avx512f,avx512vl")]
56774    const fn test_mm256_mask_blend_ps() {
56775        let a = _mm256_set1_ps(1.);
56776        let b = _mm256_set1_ps(2.);
56777        let r = _mm256_mask_blend_ps(0b11111111, a, b);
56778        let e = _mm256_set1_ps(2.);
56779        assert_eq_m256(r, e);
56780    }
56781
56782    #[simd_test(enable = "avx512f,avx512vl")]
56783    const fn test_mm_mask_blend_ps() {
56784        let a = _mm_set1_ps(1.);
56785        let b = _mm_set1_ps(2.);
56786        let r = _mm_mask_blend_ps(0b00001111, a, b);
56787        let e = _mm_set1_ps(2.);
56788        assert_eq_m128(r, e);
56789    }
56790
56791    #[simd_test(enable = "avx512f")]
56792    const fn test_mm512_unpackhi_epi32() {
56793        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56794        let b = _mm512_set_epi32(
56795            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56796        );
56797        let r = _mm512_unpackhi_epi32(a, b);
56798        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
56799        assert_eq_m512i(r, e);
56800    }
56801
56802    #[simd_test(enable = "avx512f")]
56803    const fn test_mm512_mask_unpackhi_epi32() {
56804        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56805        let b = _mm512_set_epi32(
56806            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56807        );
56808        let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
56809        assert_eq_m512i(r, a);
56810        let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
56811        let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
56812        assert_eq_m512i(r, e);
56813    }
56814
56815    #[simd_test(enable = "avx512f")]
56816    const fn test_mm512_maskz_unpackhi_epi32() {
56817        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56818        let b = _mm512_set_epi32(
56819            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56820        );
56821        let r = _mm512_maskz_unpackhi_epi32(0, a, b);
56822        assert_eq_m512i(r, _mm512_setzero_si512());
56823        let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
56824        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
56825        assert_eq_m512i(r, e);
56826    }
56827
56828    #[simd_test(enable = "avx512f,avx512vl")]
56829    const fn test_mm256_mask_unpackhi_epi32() {
56830        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56831        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
56832        let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
56833        assert_eq_m256i(r, a);
56834        let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
56835        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
56836        assert_eq_m256i(r, e);
56837    }
56838
56839    #[simd_test(enable = "avx512f,avx512vl")]
56840    const fn test_mm256_maskz_unpackhi_epi32() {
56841        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56842        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
56843        let r = _mm256_maskz_unpackhi_epi32(0, a, b);
56844        assert_eq_m256i(r, _mm256_setzero_si256());
56845        let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
56846        let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
56847        assert_eq_m256i(r, e);
56848    }
56849
56850    #[simd_test(enable = "avx512f,avx512vl")]
56851    const fn test_mm_mask_unpackhi_epi32() {
56852        let a = _mm_set_epi32(1, 2, 3, 4);
56853        let b = _mm_set_epi32(17, 18, 19, 20);
56854        let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
56855        assert_eq_m128i(r, a);
56856        let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
56857        let e = _mm_set_epi32(17, 1, 18, 2);
56858        assert_eq_m128i(r, e);
56859    }
56860
56861    #[simd_test(enable = "avx512f,avx512vl")]
56862    const fn test_mm_maskz_unpackhi_epi32() {
56863        let a = _mm_set_epi32(1, 2, 3, 4);
56864        let b = _mm_set_epi32(17, 18, 19, 20);
56865        let r = _mm_maskz_unpackhi_epi32(0, a, b);
56866        assert_eq_m128i(r, _mm_setzero_si128());
56867        let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
56868        let e = _mm_set_epi32(17, 1, 18, 2);
56869        assert_eq_m128i(r, e);
56870    }
56871
56872    #[simd_test(enable = "avx512f")]
56873    const fn test_mm512_unpackhi_ps() {
56874        let a = _mm512_set_ps(
56875            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56876        );
56877        let b = _mm512_set_ps(
56878            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
56879        );
56880        let r = _mm512_unpackhi_ps(a, b);
56881        let e = _mm512_set_ps(
56882            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
56883        );
56884        assert_eq_m512(r, e);
56885    }
56886
56887    #[simd_test(enable = "avx512f")]
56888    const fn test_mm512_mask_unpackhi_ps() {
56889        let a = _mm512_set_ps(
56890            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56891        );
56892        let b = _mm512_set_ps(
56893            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
56894        );
56895        let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
56896        assert_eq_m512(r, a);
56897        let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
56898        let e = _mm512_set_ps(
56899            17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
56900        );
56901        assert_eq_m512(r, e);
56902    }
56903
56904    #[simd_test(enable = "avx512f")]
56905    const fn test_mm512_maskz_unpackhi_ps() {
56906        let a = _mm512_set_ps(
56907            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
56908        );
56909        let b = _mm512_set_ps(
56910            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
56911        );
56912        let r = _mm512_maskz_unpackhi_ps(0, a, b);
56913        assert_eq_m512(r, _mm512_setzero_ps());
56914        let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
56915        let e = _mm512_set_ps(
56916            0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
56917        );
56918        assert_eq_m512(r, e);
56919    }
56920
56921    #[simd_test(enable = "avx512f,avx512vl")]
56922    const fn test_mm256_mask_unpackhi_ps() {
56923        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56924        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56925        let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
56926        assert_eq_m256(r, a);
56927        let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
56928        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
56929        assert_eq_m256(r, e);
56930    }
56931
56932    #[simd_test(enable = "avx512f,avx512vl")]
56933    const fn test_mm256_maskz_unpackhi_ps() {
56934        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
56935        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
56936        let r = _mm256_maskz_unpackhi_ps(0, a, b);
56937        assert_eq_m256(r, _mm256_setzero_ps());
56938        let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
56939        let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
56940        assert_eq_m256(r, e);
56941    }
56942
56943    #[simd_test(enable = "avx512f,avx512vl")]
56944    const fn test_mm_mask_unpackhi_ps() {
56945        let a = _mm_set_ps(1., 2., 3., 4.);
56946        let b = _mm_set_ps(17., 18., 19., 20.);
56947        let r = _mm_mask_unpackhi_ps(a, 0, a, b);
56948        assert_eq_m128(r, a);
56949        let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
56950        let e = _mm_set_ps(17., 1., 18., 2.);
56951        assert_eq_m128(r, e);
56952    }
56953
56954    #[simd_test(enable = "avx512f,avx512vl")]
56955    const fn test_mm_maskz_unpackhi_ps() {
56956        let a = _mm_set_ps(1., 2., 3., 4.);
56957        let b = _mm_set_ps(17., 18., 19., 20.);
56958        let r = _mm_maskz_unpackhi_ps(0, a, b);
56959        assert_eq_m128(r, _mm_setzero_ps());
56960        let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
56961        let e = _mm_set_ps(17., 1., 18., 2.);
56962        assert_eq_m128(r, e);
56963    }
56964
56965    #[simd_test(enable = "avx512f")]
56966    const fn test_mm512_unpacklo_epi32() {
56967        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56968        let b = _mm512_set_epi32(
56969            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56970        );
56971        let r = _mm512_unpacklo_epi32(a, b);
56972        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
56973        assert_eq_m512i(r, e);
56974    }
56975
56976    #[simd_test(enable = "avx512f")]
56977    const fn test_mm512_mask_unpacklo_epi32() {
56978        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56979        let b = _mm512_set_epi32(
56980            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56981        );
56982        let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
56983        assert_eq_m512i(r, a);
56984        let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
56985        let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
56986        assert_eq_m512i(r, e);
56987    }
56988
56989    #[simd_test(enable = "avx512f")]
56990    const fn test_mm512_maskz_unpacklo_epi32() {
56991        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56992        let b = _mm512_set_epi32(
56993            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
56994        );
56995        let r = _mm512_maskz_unpacklo_epi32(0, a, b);
56996        assert_eq_m512i(r, _mm512_setzero_si512());
56997        let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
56998        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
56999        assert_eq_m512i(r, e);
57000    }
57001
57002    #[simd_test(enable = "avx512f,avx512vl")]
57003    const fn test_mm256_mask_unpacklo_epi32() {
57004        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
57005        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
57006        let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
57007        assert_eq_m256i(r, a);
57008        let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
57009        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
57010        assert_eq_m256i(r, e);
57011    }
57012
57013    #[simd_test(enable = "avx512f,avx512vl")]
57014    const fn test_mm256_maskz_unpacklo_epi32() {
57015        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
57016        let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
57017        let r = _mm256_maskz_unpacklo_epi32(0, a, b);
57018        assert_eq_m256i(r, _mm256_setzero_si256());
57019        let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
57020        let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
57021        assert_eq_m256i(r, e);
57022    }
57023
57024    #[simd_test(enable = "avx512f,avx512vl")]
57025    const fn test_mm_mask_unpacklo_epi32() {
57026        let a = _mm_set_epi32(1, 2, 3, 4);
57027        let b = _mm_set_epi32(17, 18, 19, 20);
57028        let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
57029        assert_eq_m128i(r, a);
57030        let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
57031        let e = _mm_set_epi32(19, 3, 20, 4);
57032        assert_eq_m128i(r, e);
57033    }
57034
57035    #[simd_test(enable = "avx512f,avx512vl")]
57036    const fn test_mm_maskz_unpacklo_epi32() {
57037        let a = _mm_set_epi32(1, 2, 3, 4);
57038        let b = _mm_set_epi32(17, 18, 19, 20);
57039        let r = _mm_maskz_unpacklo_epi32(0, a, b);
57040        assert_eq_m128i(r, _mm_setzero_si128());
57041        let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
57042        let e = _mm_set_epi32(19, 3, 20, 4);
57043        assert_eq_m128i(r, e);
57044    }
57045
57046    #[simd_test(enable = "avx512f")]
57047    const fn test_mm512_unpacklo_ps() {
57048        let a = _mm512_set_ps(
57049            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
57050        );
57051        let b = _mm512_set_ps(
57052            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
57053        );
57054        let r = _mm512_unpacklo_ps(a, b);
57055        let e = _mm512_set_ps(
57056            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
57057        );
57058        assert_eq_m512(r, e);
57059    }
57060
57061    #[simd_test(enable = "avx512f")]
57062    const fn test_mm512_mask_unpacklo_ps() {
57063        let a = _mm512_set_ps(
57064            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
57065        );
57066        let b = _mm512_set_ps(
57067            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
57068        );
57069        let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
57070        assert_eq_m512(r, a);
57071        let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
57072        let e = _mm512_set_ps(
57073            19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
57074        );
57075        assert_eq_m512(r, e);
57076    }
57077
57078    #[simd_test(enable = "avx512f")]
57079    const fn test_mm512_maskz_unpacklo_ps() {
57080        let a = _mm512_set_ps(
57081            1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
57082        );
57083        let b = _mm512_set_ps(
57084            17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
57085        );
57086        let r = _mm512_maskz_unpacklo_ps(0, a, b);
57087        assert_eq_m512(r, _mm512_setzero_ps());
57088        let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
57089        let e = _mm512_set_ps(
57090            0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
57091        );
57092        assert_eq_m512(r, e);
57093    }
57094
57095    #[simd_test(enable = "avx512f,avx512vl")]
57096    const fn test_mm256_mask_unpacklo_ps() {
57097        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
57098        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
57099        let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
57100        assert_eq_m256(r, a);
57101        let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
57102        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
57103        assert_eq_m256(r, e);
57104    }
57105
57106    #[simd_test(enable = "avx512f,avx512vl")]
57107    const fn test_mm256_maskz_unpacklo_ps() {
57108        let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
57109        let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
57110        let r = _mm256_maskz_unpacklo_ps(0, a, b);
57111        assert_eq_m256(r, _mm256_setzero_ps());
57112        let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
57113        let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
57114        assert_eq_m256(r, e);
57115    }
57116
57117    #[simd_test(enable = "avx512f,avx512vl")]
57118    const fn test_mm_mask_unpacklo_ps() {
57119        let a = _mm_set_ps(1., 2., 3., 4.);
57120        let b = _mm_set_ps(17., 18., 19., 20.);
57121        let r = _mm_mask_unpacklo_ps(a, 0, a, b);
57122        assert_eq_m128(r, a);
57123        let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
57124        let e = _mm_set_ps(19., 3., 20., 4.);
57125        assert_eq_m128(r, e);
57126    }
57127
57128    #[simd_test(enable = "avx512f,avx512vl")]
57129    const fn test_mm_maskz_unpacklo_ps() {
57130        let a = _mm_set_ps(1., 2., 3., 4.);
57131        let b = _mm_set_ps(17., 18., 19., 20.);
57132        let r = _mm_maskz_unpacklo_ps(0, a, b);
57133        assert_eq_m128(r, _mm_setzero_ps());
57134        let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
57135        let e = _mm_set_ps(19., 3., 20., 4.);
57136        assert_eq_m128(r, e);
57137    }
57138
57139    #[simd_test(enable = "avx512f")]
57140    const fn test_mm512_alignr_epi32() {
57141        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
57142        let b = _mm512_set_epi32(
57143            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
57144        );
57145        let r = _mm512_alignr_epi32::<0>(a, b);
57146        assert_eq_m512i(r, b);
57147        let r = _mm512_alignr_epi32::<16>(a, b);
57148        assert_eq_m512i(r, b);
57149        let r = _mm512_alignr_epi32::<1>(a, b);
57150        let e = _mm512_set_epi32(
57151            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
57152        );
57153        assert_eq_m512i(r, e);
57154    }
57155
57156    #[simd_test(enable = "avx512f")]
57157    const fn test_mm512_mask_alignr_epi32() {
57158        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
57159        let b = _mm512_set_epi32(
57160            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
57161        );
57162        let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
57163        assert_eq_m512i(r, a);
57164        let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
57165        let e = _mm512_set_epi32(
57166            1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
57167        );
57168        assert_eq_m512i(r, e);
57169    }
57170
57171    #[simd_test(enable = "avx512f")]
57172    const fn test_mm512_maskz_alignr_epi32() {
57173        let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
57174        let b = _mm512_set_epi32(
57175            32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
57176        );
57177        let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
57178        assert_eq_m512i(r, _mm512_setzero_si512());
57179        let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
57180        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
57181        assert_eq_m512i(r, e);
57182    }
57183
57184    #[simd_test(enable = "avx512f,avx512vl")]
57185    const fn test_mm256_alignr_epi32() {
57186        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
57187        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
57188        let r = _mm256_alignr_epi32::<0>(a, b);
57189        assert_eq_m256i(r, b);
57190        let r = _mm256_alignr_epi32::<1>(a, b);
57191        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
57192        assert_eq_m256i(r, e);
57193    }
57194
57195    #[simd_test(enable = "avx512f,avx512vl")]
57196    const fn test_mm256_mask_alignr_epi32() {
57197        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
57198        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
57199        let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
57200        assert_eq_m256i(r, a);
57201        let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
57202        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
57203        assert_eq_m256i(r, e);
57204    }
57205
57206    #[simd_test(enable = "avx512f,avx512vl")]
57207    const fn test_mm256_maskz_alignr_epi32() {
57208        let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
57209        let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
57210        let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
57211        assert_eq_m256i(r, _mm256_setzero_si256());
57212        let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
57213        let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
57214        assert_eq_m256i(r, e);
57215    }
57216
57217    #[simd_test(enable = "avx512f,avx512vl")]
57218    const fn test_mm_alignr_epi32() {
57219        let a = _mm_set_epi32(4, 3, 2, 1);
57220        let b = _mm_set_epi32(8, 7, 6, 5);
57221        let r = _mm_alignr_epi32::<0>(a, b);
57222        assert_eq_m128i(r, b);
57223        let r = _mm_alignr_epi32::<1>(a, b);
57224        let e = _mm_set_epi32(1, 8, 7, 6);
57225        assert_eq_m128i(r, e);
57226    }
57227
57228    #[simd_test(enable = "avx512f,avx512vl")]
57229    const fn test_mm_mask_alignr_epi32() {
57230        let a = _mm_set_epi32(4, 3, 2, 1);
57231        let b = _mm_set_epi32(8, 7, 6, 5);
57232        let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
57233        assert_eq_m128i(r, a);
57234        let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
57235        let e = _mm_set_epi32(1, 8, 7, 6);
57236        assert_eq_m128i(r, e);
57237    }
57238
57239    #[simd_test(enable = "avx512f,avx512vl")]
57240    const fn test_mm_maskz_alignr_epi32() {
57241        let a = _mm_set_epi32(4, 3, 2, 1);
57242        let b = _mm_set_epi32(8, 7, 6, 5);
57243        let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
57244        assert_eq_m128i(r, _mm_setzero_si128());
57245        let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
57246        let e = _mm_set_epi32(1, 8, 7, 6);
57247        assert_eq_m128i(r, e);
57248    }
57249
57250    #[simd_test(enable = "avx512f")]
57251    const fn test_mm512_and_epi32() {
57252        #[rustfmt::skip]
57253        let a = _mm512_set_epi32(
57254            1 << 1 | 1 << 2, 0, 0, 0,
57255            0, 0, 0, 0,
57256            0, 0, 0, 0,
57257            0, 0, 0, 1 << 1 | 1 << 3,
57258        );
57259        #[rustfmt::skip]
57260        let b = _mm512_set_epi32(
57261            1 << 1, 0, 0, 0,
57262            0, 0, 0, 0,
57263            0, 0, 0, 0,
57264            0, 0, 0, 1 << 3 | 1 << 4,
57265        );
57266        let r = _mm512_and_epi32(a, b);
57267        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
57268        assert_eq_m512i(r, e);
57269    }
57270
57271    #[simd_test(enable = "avx512f")]
57272    const fn test_mm512_mask_and_epi32() {
57273        #[rustfmt::skip]
57274        let a = _mm512_set_epi32(
57275            1 << 1 | 1 << 2, 0, 0, 0,
57276            0, 0, 0, 0,
57277            0, 0, 0, 0,
57278            0, 0, 0, 1 << 1 | 1 << 3,
57279        );
57280        #[rustfmt::skip]
57281        let b = _mm512_set_epi32(
57282            1 << 1, 0, 0, 0,
57283            0, 0, 0, 0,
57284            0, 0, 0, 0,
57285            0, 0, 0, 1 << 3 | 1 << 4,
57286        );
57287        let r = _mm512_mask_and_epi32(a, 0, a, b);
57288        assert_eq_m512i(r, a);
57289        let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
57290        #[rustfmt::skip]
57291        let e = _mm512_set_epi32(
57292            1 << 1 | 1 << 2, 0, 0, 0,
57293            0, 0, 0, 0,
57294            0, 0, 0, 0,
57295            0, 0, 0, 1 << 3,
57296        );
57297        assert_eq_m512i(r, e);
57298    }
57299
57300    #[simd_test(enable = "avx512f")]
57301    const fn test_mm512_maskz_and_epi32() {
57302        #[rustfmt::skip]
57303        let a = _mm512_set_epi32(
57304            1 << 1 | 1 << 2, 0, 0, 0,
57305            0, 0, 0, 0,
57306            0, 0, 0, 0,
57307            0, 0, 0, 1 << 1 | 1 << 3,
57308        );
57309        #[rustfmt::skip]
57310        let b = _mm512_set_epi32(
57311            1 << 1, 0, 0, 0,
57312            0, 0, 0, 0,
57313            0, 0, 0, 0,
57314            0, 0, 0, 1 << 3 | 1 << 4,
57315        );
57316        let r = _mm512_maskz_and_epi32(0, a, b);
57317        assert_eq_m512i(r, _mm512_setzero_si512());
57318        let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
57319        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
57320        assert_eq_m512i(r, e);
57321    }
57322
57323    #[simd_test(enable = "avx512f,avx512vl")]
57324    const fn test_mm256_mask_and_epi32() {
57325        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57326        let b = _mm256_set1_epi32(1 << 1);
57327        let r = _mm256_mask_and_epi32(a, 0, a, b);
57328        assert_eq_m256i(r, a);
57329        let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
57330        let e = _mm256_set1_epi32(1 << 1);
57331        assert_eq_m256i(r, e);
57332    }
57333
57334    #[simd_test(enable = "avx512f,avx512vl")]
57335    const fn test_mm256_maskz_and_epi32() {
57336        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57337        let b = _mm256_set1_epi32(1 << 1);
57338        let r = _mm256_maskz_and_epi32(0, a, b);
57339        assert_eq_m256i(r, _mm256_setzero_si256());
57340        let r = _mm256_maskz_and_epi32(0b11111111, a, b);
57341        let e = _mm256_set1_epi32(1 << 1);
57342        assert_eq_m256i(r, e);
57343    }
57344
57345    #[simd_test(enable = "avx512f,avx512vl")]
57346    const fn test_mm_mask_and_epi32() {
57347        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57348        let b = _mm_set1_epi32(1 << 1);
57349        let r = _mm_mask_and_epi32(a, 0, a, b);
57350        assert_eq_m128i(r, a);
57351        let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
57352        let e = _mm_set1_epi32(1 << 1);
57353        assert_eq_m128i(r, e);
57354    }
57355
57356    #[simd_test(enable = "avx512f,avx512vl")]
57357    const fn test_mm_maskz_and_epi32() {
57358        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57359        let b = _mm_set1_epi32(1 << 1);
57360        let r = _mm_maskz_and_epi32(0, a, b);
57361        assert_eq_m128i(r, _mm_setzero_si128());
57362        let r = _mm_maskz_and_epi32(0b00001111, a, b);
57363        let e = _mm_set1_epi32(1 << 1);
57364        assert_eq_m128i(r, e);
57365    }
57366
57367    #[simd_test(enable = "avx512f")]
57368    const fn test_mm512_and_si512() {
57369        #[rustfmt::skip]
57370        let a = _mm512_set_epi32(
57371            1 << 1 | 1 << 2, 0, 0, 0,
57372            0, 0, 0, 0,
57373            0, 0, 0, 0,
57374            0, 0, 0, 1 << 1 | 1 << 3,
57375        );
57376        #[rustfmt::skip]
57377        let b = _mm512_set_epi32(
57378            1 << 1, 0, 0, 0,
57379            0, 0, 0, 0,
57380            0, 0, 0, 0,
57381            0, 0, 0, 1 << 3 | 1 << 4,
57382        );
57383        let r = _mm512_and_si512(a, b);
57384        let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
57385        assert_eq_m512i(r, e);
57386    }
57387
57388    #[simd_test(enable = "avx512f")]
57389    const fn test_mm512_or_epi32() {
57390        #[rustfmt::skip]
57391        let a = _mm512_set_epi32(
57392            1 << 1 | 1 << 2, 0, 0, 0,
57393            0, 0, 0, 0,
57394            0, 0, 0, 0,
57395            0, 0, 0, 1 << 1 | 1 << 3,
57396        );
57397        #[rustfmt::skip]
57398        let b = _mm512_set_epi32(
57399            1 << 1, 0, 0, 0,
57400            0, 0, 0, 0,
57401            0, 0, 0, 0,
57402            0, 0, 0, 1 << 3 | 1 << 4,
57403        );
57404        let r = _mm512_or_epi32(a, b);
57405        #[rustfmt::skip]
57406        let e = _mm512_set_epi32(
57407            1 << 1 | 1 << 2, 0, 0, 0,
57408            0, 0, 0, 0,
57409            0, 0, 0, 0,
57410            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57411        );
57412        assert_eq_m512i(r, e);
57413    }
57414
57415    #[simd_test(enable = "avx512f")]
57416    const fn test_mm512_mask_or_epi32() {
57417        #[rustfmt::skip]
57418        let a = _mm512_set_epi32(
57419            1 << 1 | 1 << 2, 0, 0, 0,
57420            0, 0, 0, 0,
57421            0, 0, 0, 0,
57422            0, 0, 0, 1 << 1 | 1 << 3,
57423        );
57424        #[rustfmt::skip]
57425        let b = _mm512_set_epi32(
57426            1 << 1, 0, 0, 0,
57427            0, 0, 0, 0,
57428            0, 0, 0, 0,
57429            0, 0, 0, 1 << 3 | 1 << 4,
57430        );
57431        let r = _mm512_mask_or_epi32(a, 0, a, b);
57432        assert_eq_m512i(r, a);
57433        let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
57434        #[rustfmt::skip]
57435        let e = _mm512_set_epi32(
57436            1 << 1 | 1 << 2, 0, 0, 0,
57437            0, 0, 0, 0,
57438            0, 0, 0, 0,
57439            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57440        );
57441        assert_eq_m512i(r, e);
57442    }
57443
57444    #[simd_test(enable = "avx512f")]
57445    const fn test_mm512_maskz_or_epi32() {
57446        #[rustfmt::skip]
57447        let a = _mm512_set_epi32(
57448            1 << 1 | 1 << 2, 0, 0, 0,
57449            0, 0, 0, 0,
57450            0, 0, 0, 0,
57451            0, 0, 0, 1 << 1 | 1 << 3,
57452        );
57453        #[rustfmt::skip]
57454        let b = _mm512_set_epi32(
57455            1 << 1, 0, 0, 0,
57456            0, 0, 0, 0,
57457            0, 0, 0, 0,
57458            0, 0, 0, 1 << 3 | 1 << 4,
57459        );
57460        let r = _mm512_maskz_or_epi32(0, a, b);
57461        assert_eq_m512i(r, _mm512_setzero_si512());
57462        let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
57463        #[rustfmt::skip]
57464        let e = _mm512_set_epi32(
57465            0, 0, 0, 0,
57466            0, 0, 0, 0,
57467            0, 0, 0, 0,
57468            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57469        );
57470        assert_eq_m512i(r, e);
57471    }
57472
57473    #[simd_test(enable = "avx512f,avx512vl")]
57474    const fn test_mm256_or_epi32() {
57475        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57476        let b = _mm256_set1_epi32(1 << 1);
57477        let r = _mm256_or_epi32(a, b);
57478        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
57479        assert_eq_m256i(r, e);
57480    }
57481
57482    #[simd_test(enable = "avx512f,avx512vl")]
57483    const fn test_mm256_mask_or_epi32() {
57484        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57485        let b = _mm256_set1_epi32(1 << 1);
57486        let r = _mm256_mask_or_epi32(a, 0, a, b);
57487        assert_eq_m256i(r, a);
57488        let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
57489        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
57490        assert_eq_m256i(r, e);
57491    }
57492
57493    #[simd_test(enable = "avx512f,avx512vl")]
57494    const fn test_mm256_maskz_or_epi32() {
57495        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57496        let b = _mm256_set1_epi32(1 << 1);
57497        let r = _mm256_maskz_or_epi32(0, a, b);
57498        assert_eq_m256i(r, _mm256_setzero_si256());
57499        let r = _mm256_maskz_or_epi32(0b11111111, a, b);
57500        let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
57501        assert_eq_m256i(r, e);
57502    }
57503
57504    #[simd_test(enable = "avx512f,avx512vl")]
57505    const fn test_mm_or_epi32() {
57506        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57507        let b = _mm_set1_epi32(1 << 1);
57508        let r = _mm_or_epi32(a, b);
57509        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
57510        assert_eq_m128i(r, e);
57511    }
57512
57513    #[simd_test(enable = "avx512f,avx512vl")]
57514    const fn test_mm_mask_or_epi32() {
57515        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57516        let b = _mm_set1_epi32(1 << 1);
57517        let r = _mm_mask_or_epi32(a, 0, a, b);
57518        assert_eq_m128i(r, a);
57519        let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
57520        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
57521        assert_eq_m128i(r, e);
57522    }
57523
57524    #[simd_test(enable = "avx512f,avx512vl")]
57525    const fn test_mm_maskz_or_epi32() {
57526        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57527        let b = _mm_set1_epi32(1 << 1);
57528        let r = _mm_maskz_or_epi32(0, a, b);
57529        assert_eq_m128i(r, _mm_setzero_si128());
57530        let r = _mm_maskz_or_epi32(0b00001111, a, b);
57531        let e = _mm_set1_epi32(1 << 1 | 1 << 2);
57532        assert_eq_m128i(r, e);
57533    }
57534
57535    #[simd_test(enable = "avx512f")]
57536    const fn test_mm512_or_si512() {
57537        #[rustfmt::skip]
57538        let a = _mm512_set_epi32(
57539            1 << 1 | 1 << 2, 0, 0, 0,
57540            0, 0, 0, 0,
57541            0, 0, 0, 0,
57542            0, 0, 0, 1 << 1 | 1 << 3,
57543        );
57544        #[rustfmt::skip]
57545        let b = _mm512_set_epi32(
57546            1 << 1, 0, 0, 0,
57547            0, 0, 0, 0,
57548            0, 0, 0, 0,
57549            0, 0, 0, 1 << 3 | 1 << 4,
57550        );
57551        let r = _mm512_or_si512(a, b);
57552        #[rustfmt::skip]
57553        let e = _mm512_set_epi32(
57554            1 << 1 | 1 << 2, 0, 0, 0,
57555            0, 0, 0, 0,
57556            0, 0, 0, 0,
57557            0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
57558        );
57559        assert_eq_m512i(r, e);
57560    }
57561
57562    #[simd_test(enable = "avx512f")]
57563    const fn test_mm512_xor_epi32() {
57564        #[rustfmt::skip]
57565        let a = _mm512_set_epi32(
57566            1 << 1 | 1 << 2, 0, 0, 0,
57567            0, 0, 0, 0,
57568            0, 0, 0, 0,
57569            0, 0, 0, 1 << 1 | 1 << 3,
57570        );
57571        #[rustfmt::skip]
57572        let b = _mm512_set_epi32(
57573            1 << 1, 0, 0, 0,
57574            0, 0, 0, 0,
57575            0, 0, 0, 0,
57576            0, 0, 0, 1 << 3 | 1 << 4,
57577        );
57578        let r = _mm512_xor_epi32(a, b);
57579        #[rustfmt::skip]
57580        let e = _mm512_set_epi32(
57581            1 << 2, 0, 0, 0,
57582            0, 0, 0, 0,
57583            0, 0, 0, 0,
57584            0, 0, 0, 1 << 1 | 1 << 4,
57585        );
57586        assert_eq_m512i(r, e);
57587    }
57588
57589    #[simd_test(enable = "avx512f")]
57590    const fn test_mm512_mask_xor_epi32() {
57591        #[rustfmt::skip]
57592        let a = _mm512_set_epi32(
57593            1 << 1 | 1 << 2, 0, 0, 0,
57594            0, 0, 0, 0,
57595            0, 0, 0, 0,
57596            0, 0, 0, 1 << 1 | 1 << 3,
57597        );
57598        #[rustfmt::skip]
57599        let b = _mm512_set_epi32(
57600            1 << 1, 0, 0, 0,
57601            0, 0, 0, 0,
57602            0, 0, 0, 0,
57603            0, 0, 0, 1 << 3 | 1 << 4,
57604        );
57605        let r = _mm512_mask_xor_epi32(a, 0, a, b);
57606        assert_eq_m512i(r, a);
57607        let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
57608        #[rustfmt::skip]
57609        let e = _mm512_set_epi32(
57610            1 << 1 | 1 << 2, 0, 0, 0,
57611            0, 0, 0, 0,
57612            0, 0, 0, 0,
57613            0, 0, 0, 1 << 1 | 1 << 4,
57614        );
57615        assert_eq_m512i(r, e);
57616    }
57617
57618    #[simd_test(enable = "avx512f")]
57619    const fn test_mm512_maskz_xor_epi32() {
57620        #[rustfmt::skip]
57621        let a = _mm512_set_epi32(
57622            1 << 1 | 1 << 2, 0, 0, 0,
57623            0, 0, 0, 0,
57624            0, 0, 0, 0,
57625            0, 0, 0, 1 << 1 | 1 << 3,
57626        );
57627        #[rustfmt::skip]
57628        let b = _mm512_set_epi32(
57629            1 << 1, 0, 0, 0,
57630            0, 0, 0, 0,
57631            0, 0, 0, 0,
57632            0, 0, 0, 1 << 3 | 1 << 4,
57633        );
57634        let r = _mm512_maskz_xor_epi32(0, a, b);
57635        assert_eq_m512i(r, _mm512_setzero_si512());
57636        let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
57637        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
57638        assert_eq_m512i(r, e);
57639    }
57640
57641    #[simd_test(enable = "avx512f,avx512vl")]
57642    const fn test_mm256_xor_epi32() {
57643        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57644        let b = _mm256_set1_epi32(1 << 1);
57645        let r = _mm256_xor_epi32(a, b);
57646        let e = _mm256_set1_epi32(1 << 2);
57647        assert_eq_m256i(r, e);
57648    }
57649
57650    #[simd_test(enable = "avx512f,avx512vl")]
57651    const fn test_mm256_mask_xor_epi32() {
57652        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57653        let b = _mm256_set1_epi32(1 << 1);
57654        let r = _mm256_mask_xor_epi32(a, 0, a, b);
57655        assert_eq_m256i(r, a);
57656        let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
57657        let e = _mm256_set1_epi32(1 << 2);
57658        assert_eq_m256i(r, e);
57659    }
57660
57661    #[simd_test(enable = "avx512f,avx512vl")]
57662    const fn test_mm256_maskz_xor_epi32() {
57663        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57664        let b = _mm256_set1_epi32(1 << 1);
57665        let r = _mm256_maskz_xor_epi32(0, a, b);
57666        assert_eq_m256i(r, _mm256_setzero_si256());
57667        let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
57668        let e = _mm256_set1_epi32(1 << 2);
57669        assert_eq_m256i(r, e);
57670    }
57671
57672    #[simd_test(enable = "avx512f,avx512vl")]
57673    const fn test_mm_xor_epi32() {
57674        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57675        let b = _mm_set1_epi32(1 << 1);
57676        let r = _mm_xor_epi32(a, b);
57677        let e = _mm_set1_epi32(1 << 2);
57678        assert_eq_m128i(r, e);
57679    }
57680
57681    #[simd_test(enable = "avx512f,avx512vl")]
57682    const fn test_mm_mask_xor_epi32() {
57683        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57684        let b = _mm_set1_epi32(1 << 1);
57685        let r = _mm_mask_xor_epi32(a, 0, a, b);
57686        assert_eq_m128i(r, a);
57687        let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
57688        let e = _mm_set1_epi32(1 << 2);
57689        assert_eq_m128i(r, e);
57690    }
57691
57692    #[simd_test(enable = "avx512f,avx512vl")]
57693    const fn test_mm_maskz_xor_epi32() {
57694        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57695        let b = _mm_set1_epi32(1 << 1);
57696        let r = _mm_maskz_xor_epi32(0, a, b);
57697        assert_eq_m128i(r, _mm_setzero_si128());
57698        let r = _mm_maskz_xor_epi32(0b00001111, a, b);
57699        let e = _mm_set1_epi32(1 << 2);
57700        assert_eq_m128i(r, e);
57701    }
57702
57703    #[simd_test(enable = "avx512f")]
57704    const fn test_mm512_xor_si512() {
57705        #[rustfmt::skip]
57706        let a = _mm512_set_epi32(
57707            1 << 1 | 1 << 2, 0, 0, 0,
57708            0, 0, 0, 0,
57709            0, 0, 0, 0,
57710            0, 0, 0, 1 << 1 | 1 << 3,
57711        );
57712        #[rustfmt::skip]
57713        let b = _mm512_set_epi32(
57714            1 << 1, 0, 0, 0,
57715            0, 0, 0, 0,
57716            0, 0, 0, 0,
57717            0, 0, 0, 1 << 3 | 1 << 4,
57718        );
57719        let r = _mm512_xor_si512(a, b);
57720        #[rustfmt::skip]
57721        let e = _mm512_set_epi32(
57722            1 << 2, 0, 0, 0,
57723            0, 0, 0, 0,
57724            0, 0, 0, 0,
57725            0, 0, 0, 1 << 1 | 1 << 4,
57726        );
57727        assert_eq_m512i(r, e);
57728    }
57729
57730    #[simd_test(enable = "avx512f")]
57731    const fn test_mm512_andnot_epi32() {
57732        let a = _mm512_set1_epi32(0);
57733        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
57734        let r = _mm512_andnot_epi32(a, b);
57735        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
57736        assert_eq_m512i(r, e);
57737    }
57738
57739    #[simd_test(enable = "avx512f")]
57740    const fn test_mm512_mask_andnot_epi32() {
57741        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
57742        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
57743        let r = _mm512_mask_andnot_epi32(a, 0, a, b);
57744        assert_eq_m512i(r, a);
57745        let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
57746        let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
57747        assert_eq_m512i(r, e);
57748    }
57749
57750    #[simd_test(enable = "avx512f")]
57751    const fn test_mm512_maskz_andnot_epi32() {
57752        let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
57753        let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
57754        let r = _mm512_maskz_andnot_epi32(0, a, b);
57755        assert_eq_m512i(r, _mm512_setzero_si512());
57756        let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
57757        #[rustfmt::skip]
57758        let e = _mm512_set_epi32(
57759            0, 0, 0, 0,
57760            0, 0, 0, 0,
57761            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
57762            1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
57763        );
57764        assert_eq_m512i(r, e);
57765    }
57766
57767    #[simd_test(enable = "avx512f,avx512vl")]
57768    const fn test_mm256_mask_andnot_epi32() {
57769        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57770        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
57771        let r = _mm256_mask_andnot_epi32(a, 0, a, b);
57772        assert_eq_m256i(r, a);
57773        let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
57774        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
57775        assert_eq_m256i(r, e);
57776    }
57777
57778    #[simd_test(enable = "avx512f,avx512vl")]
57779    const fn test_mm256_maskz_andnot_epi32() {
57780        let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
57781        let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
57782        let r = _mm256_maskz_andnot_epi32(0, a, b);
57783        assert_eq_m256i(r, _mm256_setzero_si256());
57784        let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
57785        let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
57786        assert_eq_m256i(r, e);
57787    }
57788
57789    #[simd_test(enable = "avx512f,avx512vl")]
57790    const fn test_mm_mask_andnot_epi32() {
57791        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57792        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
57793        let r = _mm_mask_andnot_epi32(a, 0, a, b);
57794        assert_eq_m128i(r, a);
57795        let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
57796        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
57797        assert_eq_m128i(r, e);
57798    }
57799
57800    #[simd_test(enable = "avx512f,avx512vl")]
57801    const fn test_mm_maskz_andnot_epi32() {
57802        let a = _mm_set1_epi32(1 << 1 | 1 << 2);
57803        let b = _mm_set1_epi32(1 << 3 | 1 << 4);
57804        let r = _mm_maskz_andnot_epi32(0, a, b);
57805        assert_eq_m128i(r, _mm_setzero_si128());
57806        let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
57807        let e = _mm_set1_epi32(1 << 3 | 1 << 4);
57808        assert_eq_m128i(r, e);
57809    }
57810
57811    #[simd_test(enable = "avx512f")]
57812    const fn test_cvtmask16_u32() {
57813        let a: __mmask16 = 0b11001100_00110011;
57814        let r = _cvtmask16_u32(a);
57815        let e: u32 = 0b11001100_00110011;
57816        assert_eq!(r, e);
57817    }
57818
57819    #[simd_test(enable = "avx512f")]
57820    const fn test_cvtu32_mask16() {
57821        let a: u32 = 0b11001100_00110011;
57822        let r = _cvtu32_mask16(a);
57823        let e: __mmask16 = 0b11001100_00110011;
57824        assert_eq!(r, e);
57825    }
57826
57827    #[simd_test(enable = "avx512f")]
57828    const fn test_mm512_kand() {
57829        let a: u16 = 0b11001100_00110011;
57830        let b: u16 = 0b11001100_00110011;
57831        let r = _mm512_kand(a, b);
57832        let e: u16 = 0b11001100_00110011;
57833        assert_eq!(r, e);
57834    }
57835
57836    #[simd_test(enable = "avx512f")]
57837    const fn test_kand_mask16() {
57838        let a: u16 = 0b11001100_00110011;
57839        let b: u16 = 0b11001100_00110011;
57840        let r = _kand_mask16(a, b);
57841        let e: u16 = 0b11001100_00110011;
57842        assert_eq!(r, e);
57843    }
57844
57845    #[simd_test(enable = "avx512f")]
57846    const fn test_mm512_kor() {
57847        let a: u16 = 0b11001100_00110011;
57848        let b: u16 = 0b00101110_00001011;
57849        let r = _mm512_kor(a, b);
57850        let e: u16 = 0b11101110_00111011;
57851        assert_eq!(r, e);
57852    }
57853
57854    #[simd_test(enable = "avx512f")]
57855    const fn test_kor_mask16() {
57856        let a: u16 = 0b11001100_00110011;
57857        let b: u16 = 0b00101110_00001011;
57858        let r = _kor_mask16(a, b);
57859        let e: u16 = 0b11101110_00111011;
57860        assert_eq!(r, e);
57861    }
57862
57863    #[simd_test(enable = "avx512f")]
57864    const fn test_mm512_kxor() {
57865        let a: u16 = 0b11001100_00110011;
57866        let b: u16 = 0b00101110_00001011;
57867        let r = _mm512_kxor(a, b);
57868        let e: u16 = 0b11100010_00111000;
57869        assert_eq!(r, e);
57870    }
57871
57872    #[simd_test(enable = "avx512f")]
57873    const fn test_kxor_mask16() {
57874        let a: u16 = 0b11001100_00110011;
57875        let b: u16 = 0b00101110_00001011;
57876        let r = _kxor_mask16(a, b);
57877        let e: u16 = 0b11100010_00111000;
57878        assert_eq!(r, e);
57879    }
57880
57881    #[simd_test(enable = "avx512f")]
57882    const fn test_mm512_knot() {
57883        let a: u16 = 0b11001100_00110011;
57884        let r = _mm512_knot(a);
57885        let e: u16 = 0b00110011_11001100;
57886        assert_eq!(r, e);
57887    }
57888
57889    #[simd_test(enable = "avx512f")]
57890    const fn test_knot_mask16() {
57891        let a: u16 = 0b11001100_00110011;
57892        let r = _knot_mask16(a);
57893        let e: u16 = 0b00110011_11001100;
57894        assert_eq!(r, e);
57895    }
57896
57897    #[simd_test(enable = "avx512f")]
57898    const fn test_mm512_kandn() {
57899        let a: u16 = 0b11001100_00110011;
57900        let b: u16 = 0b00101110_00001011;
57901        let r = _mm512_kandn(a, b);
57902        let e: u16 = 0b00100010_00001000;
57903        assert_eq!(r, e);
57904    }
57905
57906    #[simd_test(enable = "avx512f")]
57907    const fn test_kandn_mask16() {
57908        let a: u16 = 0b11001100_00110011;
57909        let b: u16 = 0b00101110_00001011;
57910        let r = _kandn_mask16(a, b);
57911        let e: u16 = 0b00100010_00001000;
57912        assert_eq!(r, e);
57913    }
57914
57915    #[simd_test(enable = "avx512f")]
57916    const fn test_mm512_kxnor() {
57917        let a: u16 = 0b11001100_00110011;
57918        let b: u16 = 0b00101110_00001011;
57919        let r = _mm512_kxnor(a, b);
57920        let e: u16 = 0b00011101_11000111;
57921        assert_eq!(r, e);
57922    }
57923
57924    #[simd_test(enable = "avx512f")]
57925    const fn test_kxnor_mask16() {
57926        let a: u16 = 0b11001100_00110011;
57927        let b: u16 = 0b00101110_00001011;
57928        let r = _kxnor_mask16(a, b);
57929        let e: u16 = 0b00011101_11000111;
57930        assert_eq!(r, e);
57931    }
57932
57933    #[simd_test(enable = "avx512f")]
57934    const unsafe fn test_kortest_mask16_u8() {
57935        let a: __mmask16 = 0b0110100101101001;
57936        let b: __mmask16 = 0b1011011010110110;
57937        let mut all_ones: u8 = 0;
57938        let r = _kortest_mask16_u8(a, b, &mut all_ones);
57939        assert_eq!(r, 0);
57940        assert_eq!(all_ones, 1);
57941    }
57942
57943    #[simd_test(enable = "avx512f")]
57944    const fn test_kortestc_mask16_u8() {
57945        let a: __mmask16 = 0b0110100101101001;
57946        let b: __mmask16 = 0b1011011010110110;
57947        let r = _kortestc_mask16_u8(a, b);
57948        assert_eq!(r, 1);
57949    }
57950
57951    #[simd_test(enable = "avx512f")]
57952    const fn test_kortestz_mask16_u8() {
57953        let a: __mmask16 = 0b0110100101101001;
57954        let b: __mmask16 = 0b1011011010110110;
57955        let r = _kortestz_mask16_u8(a, b);
57956        assert_eq!(r, 0);
57957    }
57958
57959    #[simd_test(enable = "avx512f")]
57960    const fn test_kshiftli_mask16() {
57961        let a: __mmask16 = 0b1001011011000011;
57962        let r = _kshiftli_mask16::<3>(a);
57963        let e: __mmask16 = 0b1011011000011000;
57964        assert_eq!(r, e);
57965
57966        let r = _kshiftli_mask16::<15>(a);
57967        let e: __mmask16 = 0b1000000000000000;
57968        assert_eq!(r, e);
57969
57970        let r = _kshiftli_mask16::<16>(a);
57971        let e: __mmask16 = 0b0000000000000000;
57972        assert_eq!(r, e);
57973
57974        let r = _kshiftli_mask16::<17>(a);
57975        let e: __mmask16 = 0b0000000000000000;
57976        assert_eq!(r, e);
57977    }
57978
57979    #[simd_test(enable = "avx512f")]
57980    const fn test_kshiftri_mask16() {
57981        let a: __mmask16 = 0b1010100100111100;
57982        let r = _kshiftri_mask16::<3>(a);
57983        let e: __mmask16 = 0b0001010100100111;
57984        assert_eq!(r, e);
57985
57986        let r = _kshiftri_mask16::<15>(a);
57987        let e: __mmask16 = 0b0000000000000001;
57988        assert_eq!(r, e);
57989
57990        let r = _kshiftri_mask16::<16>(a);
57991        let e: __mmask16 = 0b0000000000000000;
57992        assert_eq!(r, e);
57993
57994        let r = _kshiftri_mask16::<17>(a);
57995        let e: __mmask16 = 0b0000000000000000;
57996        assert_eq!(r, e);
57997    }
57998
57999    #[simd_test(enable = "avx512f")]
58000    const unsafe fn test_load_mask16() {
58001        let a: __mmask16 = 0b1001011011000011;
58002        let r = _load_mask16(&a);
58003        let e: __mmask16 = 0b1001011011000011;
58004        assert_eq!(r, e);
58005    }
58006
58007    #[simd_test(enable = "avx512f")]
58008    const unsafe fn test_store_mask16() {
58009        let a: __mmask16 = 0b0110100100111100;
58010        let mut r = 0;
58011        _store_mask16(&mut r, a);
58012        let e: __mmask16 = 0b0110100100111100;
58013        assert_eq!(r, e);
58014    }
58015
58016    #[simd_test(enable = "avx512f")]
58017    const fn test_mm512_kmov() {
58018        let a: u16 = 0b11001100_00110011;
58019        let r = _mm512_kmov(a);
58020        let e: u16 = 0b11001100_00110011;
58021        assert_eq!(r, e);
58022    }
58023
58024    #[simd_test(enable = "avx512f")]
58025    const fn test_mm512_int2mask() {
58026        let a: i32 = 0b11001100_00110011;
58027        let r = _mm512_int2mask(a);
58028        let e: u16 = 0b11001100_00110011;
58029        assert_eq!(r, e);
58030    }
58031
58032    #[simd_test(enable = "avx512f")]
58033    const fn test_mm512_mask2int() {
58034        let k1: __mmask16 = 0b11001100_00110011;
58035        let r = _mm512_mask2int(k1);
58036        let e: i32 = 0b11001100_00110011;
58037        assert_eq!(r, e);
58038    }
58039
58040    #[simd_test(enable = "avx512f")]
58041    const fn test_mm512_kunpackb() {
58042        let a: u16 = 0b11001100_00110011;
58043        let b: u16 = 0b00101110_00001011;
58044        let r = _mm512_kunpackb(a, b);
58045        let e: u16 = 0b00110011_00001011;
58046        assert_eq!(r, e);
58047    }
58048
58049    #[simd_test(enable = "avx512f")]
58050    const fn test_mm512_kortestc() {
58051        let a: u16 = 0b11001100_00110011;
58052        let b: u16 = 0b00101110_00001011;
58053        let r = _mm512_kortestc(a, b);
58054        assert_eq!(r, 0);
58055        let b: u16 = 0b11111111_11111111;
58056        let r = _mm512_kortestc(a, b);
58057        assert_eq!(r, 1);
58058    }
58059
58060    #[simd_test(enable = "avx512f")]
58061    const fn test_mm512_kortestz() {
58062        let a: u16 = 0b11001100_00110011;
58063        let b: u16 = 0b00101110_00001011;
58064        let r = _mm512_kortestz(a, b);
58065        assert_eq!(r, 0);
58066        let r = _mm512_kortestz(0, 0);
58067        assert_eq!(r, 1);
58068    }
58069
58070    #[simd_test(enable = "avx512f")]
58071    const fn test_mm512_test_epi32_mask() {
58072        let a = _mm512_set1_epi32(1 << 0);
58073        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
58074        let r = _mm512_test_epi32_mask(a, b);
58075        let e: __mmask16 = 0b11111111_11111111;
58076        assert_eq!(r, e);
58077    }
58078
58079    #[simd_test(enable = "avx512f")]
58080    const fn test_mm512_mask_test_epi32_mask() {
58081        let a = _mm512_set1_epi32(1 << 0);
58082        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
58083        let r = _mm512_mask_test_epi32_mask(0, a, b);
58084        assert_eq!(r, 0);
58085        let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
58086        let e: __mmask16 = 0b11111111_11111111;
58087        assert_eq!(r, e);
58088    }
58089
58090    #[simd_test(enable = "avx512f,avx512vl")]
58091    const fn test_mm256_test_epi32_mask() {
58092        let a = _mm256_set1_epi32(1 << 0);
58093        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
58094        let r = _mm256_test_epi32_mask(a, b);
58095        let e: __mmask8 = 0b11111111;
58096        assert_eq!(r, e);
58097    }
58098
58099    #[simd_test(enable = "avx512f,avx512vl")]
58100    const fn test_mm256_mask_test_epi32_mask() {
58101        let a = _mm256_set1_epi32(1 << 0);
58102        let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
58103        let r = _mm256_mask_test_epi32_mask(0, a, b);
58104        assert_eq!(r, 0);
58105        let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
58106        let e: __mmask8 = 0b11111111;
58107        assert_eq!(r, e);
58108    }
58109
58110    #[simd_test(enable = "avx512f,avx512vl")]
58111    const fn test_mm_test_epi32_mask() {
58112        let a = _mm_set1_epi32(1 << 0);
58113        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
58114        let r = _mm_test_epi32_mask(a, b);
58115        let e: __mmask8 = 0b00001111;
58116        assert_eq!(r, e);
58117    }
58118
58119    #[simd_test(enable = "avx512f,avx512vl")]
58120    const fn test_mm_mask_test_epi32_mask() {
58121        let a = _mm_set1_epi32(1 << 0);
58122        let b = _mm_set1_epi32(1 << 0 | 1 << 1);
58123        let r = _mm_mask_test_epi32_mask(0, a, b);
58124        assert_eq!(r, 0);
58125        let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
58126        let e: __mmask8 = 0b00001111;
58127        assert_eq!(r, e);
58128    }
58129
58130    #[simd_test(enable = "avx512f")]
58131    const fn test_mm512_testn_epi32_mask() {
58132        let a = _mm512_set1_epi32(1 << 0);
58133        let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
58134        let r = _mm512_testn_epi32_mask(a, b);
58135        let e: __mmask16 = 0b00000000_00000000;
58136        assert_eq!(r, e);
58137    }
58138
58139    #[simd_test(enable = "avx512f")]
58140    const fn test_mm512_mask_testn_epi32_mask() {
58141        let a = _mm512_set1_epi32(1 << 0);
58142        let b = _mm512_set1_epi32(1 << 1);
58143        let r = _mm512_mask_test_epi32_mask(0, a, b);
58144        assert_eq!(r, 0);
58145        let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
58146        let e: __mmask16 = 0b11111111_11111111;
58147        assert_eq!(r, e);
58148    }
58149
58150    #[simd_test(enable = "avx512f,avx512vl")]
58151    const fn test_mm256_testn_epi32_mask() {
58152        let a = _mm256_set1_epi32(1 << 0);
58153        let b = _mm256_set1_epi32(1 << 1);
58154        let r = _mm256_testn_epi32_mask(a, b);
58155        let e: __mmask8 = 0b11111111;
58156        assert_eq!(r, e);
58157    }
58158
58159    #[simd_test(enable = "avx512f,avx512vl")]
58160    const fn test_mm256_mask_testn_epi32_mask() {
58161        let a = _mm256_set1_epi32(1 << 0);
58162        let b = _mm256_set1_epi32(1 << 1);
58163        let r = _mm256_mask_test_epi32_mask(0, a, b);
58164        assert_eq!(r, 0);
58165        let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
58166        let e: __mmask8 = 0b11111111;
58167        assert_eq!(r, e);
58168    }
58169
58170    #[simd_test(enable = "avx512f,avx512vl")]
58171    const fn test_mm_testn_epi32_mask() {
58172        let a = _mm_set1_epi32(1 << 0);
58173        let b = _mm_set1_epi32(1 << 1);
58174        let r = _mm_testn_epi32_mask(a, b);
58175        let e: __mmask8 = 0b00001111;
58176        assert_eq!(r, e);
58177    }
58178
58179    #[simd_test(enable = "avx512f,avx512vl")]
58180    const fn test_mm_mask_testn_epi32_mask() {
58181        let a = _mm_set1_epi32(1 << 0);
58182        let b = _mm_set1_epi32(1 << 1);
58183        let r = _mm_mask_test_epi32_mask(0, a, b);
58184        assert_eq!(r, 0);
58185        let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
58186        let e: __mmask8 = 0b00001111;
58187        assert_eq!(r, e);
58188    }
58189
58190    #[simd_test(enable = "avx512f")]
58191    #[cfg_attr(miri, ignore)]
58192    unsafe fn test_mm512_stream_ps() {
58193        #[repr(align(64))]
58194        struct Memory {
58195            pub data: [f32; 16], // 64 bytes
58196        }
58197        let a = _mm512_set1_ps(7.0);
58198        let mut mem = Memory { data: [-1.0; 16] };
58199
58200        _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
58201        _mm_sfence();
58202        for i in 0..16 {
58203            assert_eq!(mem.data[i], get_m512(a, i));
58204        }
58205    }
58206
58207    #[simd_test(enable = "avx512f")]
58208    #[cfg_attr(miri, ignore)]
58209    unsafe fn test_mm512_stream_pd() {
58210        #[repr(align(64))]
58211        struct Memory {
58212            pub data: [f64; 8],
58213        }
58214        let a = _mm512_set1_pd(7.0);
58215        let mut mem = Memory { data: [-1.0; 8] };
58216
58217        _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
58218        _mm_sfence();
58219        for i in 0..8 {
58220            assert_eq!(mem.data[i], get_m512d(a, i));
58221        }
58222    }
58223
58224    #[simd_test(enable = "avx512f")]
58225    #[cfg_attr(miri, ignore)]
58226    unsafe fn test_mm512_stream_si512() {
58227        #[repr(align(64))]
58228        struct Memory {
58229            pub data: [i64; 8],
58230        }
58231        let a = _mm512_set1_epi32(7);
58232        let mut mem = Memory { data: [-1; 8] };
58233
58234        _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
58235        _mm_sfence();
58236        for i in 0..8 {
58237            assert_eq!(mem.data[i], get_m512i(a, i));
58238        }
58239    }
58240
58241    #[simd_test(enable = "avx512f")]
58242    unsafe fn test_mm512_stream_load_si512() {
58243        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
58244        let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
58245        assert_eq_m512i(a, r);
58246    }
58247
58248    #[simd_test(enable = "avx512f")]
58249    const fn test_mm512_reduce_add_epi32() {
58250        let a = _mm512_set1_epi32(1);
58251        let e: i32 = _mm512_reduce_add_epi32(a);
58252        assert_eq!(16, e);
58253    }
58254
58255    #[simd_test(enable = "avx512f")]
58256    const fn test_mm512_mask_reduce_add_epi32() {
58257        let a = _mm512_set1_epi32(1);
58258        let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
58259        assert_eq!(8, e);
58260    }
58261
58262    #[simd_test(enable = "avx512f")]
58263    const fn test_mm512_reduce_add_ps() {
58264        let a = _mm512_set1_ps(1.);
58265        let e: f32 = _mm512_reduce_add_ps(a);
58266        assert_eq!(16., e);
58267    }
58268
58269    #[simd_test(enable = "avx512f")]
58270    const fn test_mm512_mask_reduce_add_ps() {
58271        let a = _mm512_set1_ps(1.);
58272        let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
58273        assert_eq!(8., e);
58274    }
58275
58276    #[simd_test(enable = "avx512f")]
58277    const fn test_mm512_reduce_mul_epi32() {
58278        let a = _mm512_set1_epi32(2);
58279        let e: i32 = _mm512_reduce_mul_epi32(a);
58280        assert_eq!(65536, e);
58281    }
58282
58283    #[simd_test(enable = "avx512f")]
58284    const fn test_mm512_mask_reduce_mul_epi32() {
58285        let a = _mm512_set1_epi32(2);
58286        let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
58287        assert_eq!(256, e);
58288    }
58289
58290    #[simd_test(enable = "avx512f")]
58291    const fn test_mm512_reduce_mul_ps() {
58292        let a = _mm512_set1_ps(2.);
58293        let e: f32 = _mm512_reduce_mul_ps(a);
58294        assert_eq!(65536., e);
58295    }
58296
58297    #[simd_test(enable = "avx512f")]
58298    const fn test_mm512_mask_reduce_mul_ps() {
58299        let a = _mm512_set1_ps(2.);
58300        let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
58301        assert_eq!(256., e);
58302    }
58303
58304    #[simd_test(enable = "avx512f")]
58305    const fn test_mm512_reduce_max_epi32() {
58306        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58307        let e: i32 = _mm512_reduce_max_epi32(a);
58308        assert_eq!(15, e);
58309    }
58310
58311    #[simd_test(enable = "avx512f")]
58312    const fn test_mm512_mask_reduce_max_epi32() {
58313        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58314        let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
58315        assert_eq!(7, e);
58316    }
58317
58318    #[simd_test(enable = "avx512f")]
58319    const fn test_mm512_reduce_max_epu32() {
58320        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58321        let e: u32 = _mm512_reduce_max_epu32(a);
58322        assert_eq!(15, e);
58323    }
58324
58325    #[simd_test(enable = "avx512f")]
58326    const fn test_mm512_mask_reduce_max_epu32() {
58327        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58328        let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
58329        assert_eq!(7, e);
58330    }
58331
58332    #[simd_test(enable = "avx512f")]
58333    fn test_mm512_reduce_max_ps() {
58334        let a = _mm512_set_ps(
58335            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58336        );
58337        let e: f32 = _mm512_reduce_max_ps(a);
58338        assert_eq!(15., e);
58339    }
58340
58341    #[simd_test(enable = "avx512f")]
58342    fn test_mm512_mask_reduce_max_ps() {
58343        let a = _mm512_set_ps(
58344            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58345        );
58346        let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
58347        assert_eq!(7., e);
58348    }
58349
58350    #[simd_test(enable = "avx512f")]
58351    const fn test_mm512_reduce_min_epi32() {
58352        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58353        let e: i32 = _mm512_reduce_min_epi32(a);
58354        assert_eq!(0, e);
58355    }
58356
58357    #[simd_test(enable = "avx512f")]
58358    const fn test_mm512_mask_reduce_min_epi32() {
58359        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58360        let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
58361        assert_eq!(0, e);
58362    }
58363
58364    #[simd_test(enable = "avx512f")]
58365    const fn test_mm512_reduce_min_epu32() {
58366        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58367        let e: u32 = _mm512_reduce_min_epu32(a);
58368        assert_eq!(0, e);
58369    }
58370
58371    #[simd_test(enable = "avx512f")]
58372    const fn test_mm512_mask_reduce_min_epu32() {
58373        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58374        let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
58375        assert_eq!(0, e);
58376    }
58377
58378    #[simd_test(enable = "avx512f")]
58379    fn test_mm512_reduce_min_ps() {
58380        let a = _mm512_set_ps(
58381            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58382        );
58383        let e: f32 = _mm512_reduce_min_ps(a);
58384        assert_eq!(0., e);
58385    }
58386
58387    #[simd_test(enable = "avx512f")]
58388    fn test_mm512_mask_reduce_min_ps() {
58389        let a = _mm512_set_ps(
58390            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58391        );
58392        let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
58393        assert_eq!(0., e);
58394    }
58395
58396    #[simd_test(enable = "avx512f")]
58397    const fn test_mm512_reduce_and_epi32() {
58398        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58399        let e: i32 = _mm512_reduce_and_epi32(a);
58400        assert_eq!(0, e);
58401    }
58402
58403    #[simd_test(enable = "avx512f")]
58404    const fn test_mm512_mask_reduce_and_epi32() {
58405        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58406        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
58407        assert_eq!(1, e);
58408    }
58409
58410    #[simd_test(enable = "avx512f")]
58411    const fn test_mm512_reduce_or_epi32() {
58412        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58413        let e: i32 = _mm512_reduce_or_epi32(a);
58414        assert_eq!(3, e);
58415    }
58416
58417    #[simd_test(enable = "avx512f")]
58418    const fn test_mm512_mask_reduce_or_epi32() {
58419        let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
58420        let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
58421        assert_eq!(1, e);
58422    }
58423
58424    #[simd_test(enable = "avx512f")]
58425    fn test_mm512_mask_compress_epi32() {
58426        let src = _mm512_set1_epi32(200);
58427        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58428        let r = _mm512_mask_compress_epi32(src, 0, a);
58429        assert_eq_m512i(r, src);
58430        let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
58431        let e = _mm512_set_epi32(
58432            200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
58433        );
58434        assert_eq_m512i(r, e);
58435    }
58436
58437    #[simd_test(enable = "avx512f")]
58438    fn test_mm512_maskz_compress_epi32() {
58439        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58440        let r = _mm512_maskz_compress_epi32(0, a);
58441        assert_eq_m512i(r, _mm512_setzero_si512());
58442        let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
58443        let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
58444        assert_eq_m512i(r, e);
58445    }
58446
58447    #[simd_test(enable = "avx512f,avx512vl")]
58448    fn test_mm256_mask_compress_epi32() {
58449        let src = _mm256_set1_epi32(200);
58450        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58451        let r = _mm256_mask_compress_epi32(src, 0, a);
58452        assert_eq_m256i(r, src);
58453        let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
58454        let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
58455        assert_eq_m256i(r, e);
58456    }
58457
58458    #[simd_test(enable = "avx512f,avx512vl")]
58459    fn test_mm256_maskz_compress_epi32() {
58460        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58461        let r = _mm256_maskz_compress_epi32(0, a);
58462        assert_eq_m256i(r, _mm256_setzero_si256());
58463        let r = _mm256_maskz_compress_epi32(0b01010101, a);
58464        let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
58465        assert_eq_m256i(r, e);
58466    }
58467
58468    #[simd_test(enable = "avx512f,avx512vl")]
58469    fn test_mm_mask_compress_epi32() {
58470        let src = _mm_set1_epi32(200);
58471        let a = _mm_set_epi32(0, 1, 2, 3);
58472        let r = _mm_mask_compress_epi32(src, 0, a);
58473        assert_eq_m128i(r, src);
58474        let r = _mm_mask_compress_epi32(src, 0b00000101, a);
58475        let e = _mm_set_epi32(200, 200, 1, 3);
58476        assert_eq_m128i(r, e);
58477    }
58478
58479    #[simd_test(enable = "avx512f,avx512vl")]
58480    fn test_mm_maskz_compress_epi32() {
58481        let a = _mm_set_epi32(0, 1, 2, 3);
58482        let r = _mm_maskz_compress_epi32(0, a);
58483        assert_eq_m128i(r, _mm_setzero_si128());
58484        let r = _mm_maskz_compress_epi32(0b00000101, a);
58485        let e = _mm_set_epi32(0, 0, 1, 3);
58486        assert_eq_m128i(r, e);
58487    }
58488
58489    #[simd_test(enable = "avx512f")]
58490    fn test_mm512_mask_compress_ps() {
58491        let src = _mm512_set1_ps(200.);
58492        let a = _mm512_set_ps(
58493            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58494        );
58495        let r = _mm512_mask_compress_ps(src, 0, a);
58496        assert_eq_m512(r, src);
58497        let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
58498        let e = _mm512_set_ps(
58499            200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
58500        );
58501        assert_eq_m512(r, e);
58502    }
58503
58504    #[simd_test(enable = "avx512f")]
58505    fn test_mm512_maskz_compress_ps() {
58506        let a = _mm512_set_ps(
58507            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58508        );
58509        let r = _mm512_maskz_compress_ps(0, a);
58510        assert_eq_m512(r, _mm512_setzero_ps());
58511        let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
58512        let e = _mm512_set_ps(
58513            0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
58514        );
58515        assert_eq_m512(r, e);
58516    }
58517
58518    #[simd_test(enable = "avx512f,avx512vl")]
58519    fn test_mm256_mask_compress_ps() {
58520        let src = _mm256_set1_ps(200.);
58521        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58522        let r = _mm256_mask_compress_ps(src, 0, a);
58523        assert_eq_m256(r, src);
58524        let r = _mm256_mask_compress_ps(src, 0b01010101, a);
58525        let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
58526        assert_eq_m256(r, e);
58527    }
58528
58529    #[simd_test(enable = "avx512f,avx512vl")]
58530    fn test_mm256_maskz_compress_ps() {
58531        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58532        let r = _mm256_maskz_compress_ps(0, a);
58533        assert_eq_m256(r, _mm256_setzero_ps());
58534        let r = _mm256_maskz_compress_ps(0b01010101, a);
58535        let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
58536        assert_eq_m256(r, e);
58537    }
58538
58539    #[simd_test(enable = "avx512f,avx512vl")]
58540    fn test_mm_mask_compress_ps() {
58541        let src = _mm_set1_ps(200.);
58542        let a = _mm_set_ps(0., 1., 2., 3.);
58543        let r = _mm_mask_compress_ps(src, 0, a);
58544        assert_eq_m128(r, src);
58545        let r = _mm_mask_compress_ps(src, 0b00000101, a);
58546        let e = _mm_set_ps(200., 200., 1., 3.);
58547        assert_eq_m128(r, e);
58548    }
58549
58550    #[simd_test(enable = "avx512f,avx512vl")]
58551    fn test_mm_maskz_compress_ps() {
58552        let a = _mm_set_ps(0., 1., 2., 3.);
58553        let r = _mm_maskz_compress_ps(0, a);
58554        assert_eq_m128(r, _mm_setzero_ps());
58555        let r = _mm_maskz_compress_ps(0b00000101, a);
58556        let e = _mm_set_ps(0., 0., 1., 3.);
58557        assert_eq_m128(r, e);
58558    }
58559
58560    #[simd_test(enable = "avx512f")]
58561    unsafe fn test_mm512_mask_compressstoreu_epi32() {
58562        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
58563        let mut r = [0_i32; 16];
58564        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
58565        assert_eq!(&r, &[0_i32; 16]);
58566        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a);
58567        assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
58568    }
58569
58570    #[simd_test(enable = "avx512f,avx512vl")]
58571    unsafe fn test_mm256_mask_compressstoreu_epi32() {
58572        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
58573        let mut r = [0_i32; 8];
58574        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
58575        assert_eq!(&r, &[0_i32; 8]);
58576        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a);
58577        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
58578    }
58579
58580    #[simd_test(enable = "avx512f,avx512vl")]
58581    unsafe fn test_mm_mask_compressstoreu_epi32() {
58582        let a = _mm_setr_epi32(1, 2, 3, 4);
58583        let mut r = [0_i32; 4];
58584        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
58585        assert_eq!(&r, &[0_i32; 4]);
58586        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a);
58587        assert_eq!(&r, &[1, 2, 4, 0]);
58588    }
58589
58590    #[simd_test(enable = "avx512f")]
58591    unsafe fn test_mm512_mask_compressstoreu_epi64() {
58592        let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
58593        let mut r = [0_i64; 8];
58594        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
58595        assert_eq!(&r, &[0_i64; 8]);
58596        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a);
58597        assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
58598    }
58599
58600    #[simd_test(enable = "avx512f,avx512vl")]
58601    unsafe fn test_mm256_mask_compressstoreu_epi64() {
58602        let a = _mm256_setr_epi64x(1, 2, 3, 4);
58603        let mut r = [0_i64; 4];
58604        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
58605        assert_eq!(&r, &[0_i64; 4]);
58606        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a);
58607        assert_eq!(&r, &[1, 2, 4, 0]);
58608    }
58609
58610    #[simd_test(enable = "avx512f,avx512vl")]
58611    unsafe fn test_mm_mask_compressstoreu_epi64() {
58612        let a = _mm_setr_epi64x(1, 2);
58613        let mut r = [0_i64; 2];
58614        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
58615        assert_eq!(&r, &[0_i64; 2]);
58616        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a);
58617        assert_eq!(&r, &[2, 0]);
58618    }
58619
58620    #[simd_test(enable = "avx512f")]
58621    unsafe fn test_mm512_mask_compressstoreu_ps() {
58622        let a = _mm512_setr_ps(
58623            1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
58624            13_f32, 14_f32, 15_f32, 16_f32,
58625        );
58626        let mut r = [0_f32; 16];
58627        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
58628        assert_eq!(&r, &[0_f32; 16]);
58629        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a);
58630        assert_eq!(
58631            &r,
58632            &[
58633                2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
58634                0_f32, 0_f32, 0_f32, 0_f32, 0_f32
58635            ]
58636        );
58637    }
58638
58639    #[simd_test(enable = "avx512f,avx512vl")]
58640    unsafe fn test_mm256_mask_compressstoreu_ps() {
58641        let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
58642        let mut r = [0_f32; 8];
58643        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
58644        assert_eq!(&r, &[0_f32; 8]);
58645        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a);
58646        assert_eq!(
58647            &r,
58648            &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
58649        );
58650    }
58651
58652    #[simd_test(enable = "avx512f,avx512vl")]
58653    unsafe fn test_mm_mask_compressstoreu_ps() {
58654        let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
58655        let mut r = [0.; 4];
58656        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
58657        assert_eq!(&r, &[0.; 4]);
58658        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a);
58659        assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
58660    }
58661
58662    #[simd_test(enable = "avx512f")]
58663    unsafe fn test_mm512_mask_compressstoreu_pd() {
58664        let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
58665        let mut r = [0.; 8];
58666        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
58667        assert_eq!(&r, &[0.; 8]);
58668        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a);
58669        assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
58670    }
58671
58672    #[simd_test(enable = "avx512f,avx512vl")]
58673    unsafe fn test_mm256_mask_compressstoreu_pd() {
58674        let a = _mm256_setr_pd(1., 2., 3., 4.);
58675        let mut r = [0.; 4];
58676        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
58677        assert_eq!(&r, &[0.; 4]);
58678        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a);
58679        assert_eq!(&r, &[1., 2., 4., 0.]);
58680    }
58681
58682    #[simd_test(enable = "avx512f,avx512vl")]
58683    unsafe fn test_mm_mask_compressstoreu_pd() {
58684        let a = _mm_setr_pd(1., 2.);
58685        let mut r = [0.; 2];
58686        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
58687        assert_eq!(&r, &[0.; 2]);
58688        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a);
58689        assert_eq!(&r, &[2., 0.]);
58690    }
58691
58692    #[simd_test(enable = "avx512f")]
58693    unsafe fn test_mm512_mask_expand_epi32() {
58694        let src = _mm512_set1_epi32(200);
58695        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58696        let r = _mm512_mask_expand_epi32(src, 0, a);
58697        assert_eq_m512i(r, src);
58698        let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
58699        let e = _mm512_set_epi32(
58700            200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
58701        );
58702        assert_eq_m512i(r, e);
58703    }
58704
58705    #[simd_test(enable = "avx512f")]
58706    fn test_mm512_maskz_expand_epi32() {
58707        let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
58708        let r = _mm512_maskz_expand_epi32(0, a);
58709        assert_eq_m512i(r, _mm512_setzero_si512());
58710        let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
58711        let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
58712        assert_eq_m512i(r, e);
58713    }
58714
58715    #[simd_test(enable = "avx512f,avx512vl")]
58716    fn test_mm256_mask_expand_epi32() {
58717        let src = _mm256_set1_epi32(200);
58718        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58719        let r = _mm256_mask_expand_epi32(src, 0, a);
58720        assert_eq_m256i(r, src);
58721        let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
58722        let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
58723        assert_eq_m256i(r, e);
58724    }
58725
58726    #[simd_test(enable = "avx512f,avx512vl")]
58727    fn test_mm256_maskz_expand_epi32() {
58728        let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
58729        let r = _mm256_maskz_expand_epi32(0, a);
58730        assert_eq_m256i(r, _mm256_setzero_si256());
58731        let r = _mm256_maskz_expand_epi32(0b01010101, a);
58732        let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
58733        assert_eq_m256i(r, e);
58734    }
58735
58736    #[simd_test(enable = "avx512f,avx512vl")]
58737    fn test_mm_mask_expand_epi32() {
58738        let src = _mm_set1_epi32(200);
58739        let a = _mm_set_epi32(0, 1, 2, 3);
58740        let r = _mm_mask_expand_epi32(src, 0, a);
58741        assert_eq_m128i(r, src);
58742        let r = _mm_mask_expand_epi32(src, 0b00000101, a);
58743        let e = _mm_set_epi32(200, 2, 200, 3);
58744        assert_eq_m128i(r, e);
58745    }
58746
58747    #[simd_test(enable = "avx512f,avx512vl")]
58748    fn test_mm_maskz_expand_epi32() {
58749        let a = _mm_set_epi32(0, 1, 2, 3);
58750        let r = _mm_maskz_expand_epi32(0, a);
58751        assert_eq_m128i(r, _mm_setzero_si128());
58752        let r = _mm_maskz_expand_epi32(0b00000101, a);
58753        let e = _mm_set_epi32(0, 2, 0, 3);
58754        assert_eq_m128i(r, e);
58755    }
58756
58757    #[simd_test(enable = "avx512f")]
58758    fn test_mm512_mask_expand_ps() {
58759        let src = _mm512_set1_ps(200.);
58760        let a = _mm512_set_ps(
58761            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58762        );
58763        let r = _mm512_mask_expand_ps(src, 0, a);
58764        assert_eq_m512(r, src);
58765        let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
58766        let e = _mm512_set_ps(
58767            200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
58768        );
58769        assert_eq_m512(r, e);
58770    }
58771
58772    #[simd_test(enable = "avx512f")]
58773    fn test_mm512_maskz_expand_ps() {
58774        let a = _mm512_set_ps(
58775            0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
58776        );
58777        let r = _mm512_maskz_expand_ps(0, a);
58778        assert_eq_m512(r, _mm512_setzero_ps());
58779        let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
58780        let e = _mm512_set_ps(
58781            0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
58782        );
58783        assert_eq_m512(r, e);
58784    }
58785
58786    #[simd_test(enable = "avx512f,avx512vl")]
58787    fn test_mm256_mask_expand_ps() {
58788        let src = _mm256_set1_ps(200.);
58789        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58790        let r = _mm256_mask_expand_ps(src, 0, a);
58791        assert_eq_m256(r, src);
58792        let r = _mm256_mask_expand_ps(src, 0b01010101, a);
58793        let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
58794        assert_eq_m256(r, e);
58795    }
58796
58797    #[simd_test(enable = "avx512f,avx512vl")]
58798    fn test_mm256_maskz_expand_ps() {
58799        let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
58800        let r = _mm256_maskz_expand_ps(0, a);
58801        assert_eq_m256(r, _mm256_setzero_ps());
58802        let r = _mm256_maskz_expand_ps(0b01010101, a);
58803        let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
58804        assert_eq_m256(r, e);
58805    }
58806
58807    #[simd_test(enable = "avx512f,avx512vl")]
58808    fn test_mm_mask_expand_ps() {
58809        let src = _mm_set1_ps(200.);
58810        let a = _mm_set_ps(0., 1., 2., 3.);
58811        let r = _mm_mask_expand_ps(src, 0, a);
58812        assert_eq_m128(r, src);
58813        let r = _mm_mask_expand_ps(src, 0b00000101, a);
58814        let e = _mm_set_ps(200., 2., 200., 3.);
58815        assert_eq_m128(r, e);
58816    }
58817
58818    #[simd_test(enable = "avx512f,avx512vl")]
58819    fn test_mm_maskz_expand_ps() {
58820        let a = _mm_set_ps(0., 1., 2., 3.);
58821        let r = _mm_maskz_expand_ps(0, a);
58822        assert_eq_m128(r, _mm_setzero_ps());
58823        let r = _mm_maskz_expand_ps(0b00000101, a);
58824        let e = _mm_set_ps(0., 2., 0., 3.);
58825        assert_eq_m128(r, e);
58826    }
58827
58828    #[simd_test(enable = "avx512f")]
58829    const unsafe fn test_mm512_loadu_epi32() {
58830        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
58831        let p = a.as_ptr();
58832        let r = _mm512_loadu_epi32(black_box(p));
58833        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
58834        assert_eq_m512i(r, e);
58835    }
58836
58837    #[simd_test(enable = "avx512f,avx512vl")]
58838    const unsafe fn test_mm256_loadu_epi32() {
58839        let a = &[4, 3, 2, 5, 8, 9, 64, 50];
58840        let p = a.as_ptr();
58841        let r = _mm256_loadu_epi32(black_box(p));
58842        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
58843        assert_eq_m256i(r, e);
58844    }
58845
58846    #[simd_test(enable = "avx512f,avx512vl")]
58847    const unsafe fn test_mm_loadu_epi32() {
58848        let a = &[4, 3, 2, 5];
58849        let p = a.as_ptr();
58850        let r = _mm_loadu_epi32(black_box(p));
58851        let e = _mm_setr_epi32(4, 3, 2, 5);
58852        assert_eq_m128i(r, e);
58853    }
58854
58855    #[simd_test(enable = "avx512f")]
58856    unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
58857        let a = _mm512_set1_epi32(9);
58858        let mut r = _mm256_undefined_si256();
58859        _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
58860        let e = _mm256_set1_epi16(9);
58861        assert_eq_m256i(r, e);
58862    }
58863
58864    #[simd_test(enable = "avx512f,avx512vl")]
58865    unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
58866        let a = _mm256_set1_epi32(9);
58867        let mut r = _mm_undefined_si128();
58868        _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
58869        let e = _mm_set1_epi16(9);
58870        assert_eq_m128i(r, e);
58871    }
58872
58873    #[simd_test(enable = "avx512f,avx512vl")]
58874    unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
58875        let a = _mm_set1_epi32(9);
58876        let mut r = _mm_set1_epi8(0);
58877        _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
58878        let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
58879        assert_eq_m128i(r, e);
58880    }
58881
58882    #[simd_test(enable = "avx512f")]
58883    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
58884        let a = _mm512_set1_epi32(i32::MAX);
58885        let mut r = _mm256_undefined_si256();
58886        _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
58887        let e = _mm256_set1_epi16(i16::MAX);
58888        assert_eq_m256i(r, e);
58889    }
58890
58891    #[simd_test(enable = "avx512f,avx512vl")]
58892    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
58893        let a = _mm256_set1_epi32(i32::MAX);
58894        let mut r = _mm_undefined_si128();
58895        _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
58896        let e = _mm_set1_epi16(i16::MAX);
58897        assert_eq_m128i(r, e);
58898    }
58899
58900    #[simd_test(enable = "avx512f,avx512vl")]
58901    unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
58902        let a = _mm_set1_epi32(i32::MAX);
58903        let mut r = _mm_set1_epi8(0);
58904        _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
58905        let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
58906        assert_eq_m128i(r, e);
58907    }
58908
58909    #[simd_test(enable = "avx512f")]
58910    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
58911        let a = _mm512_set1_epi32(i32::MAX);
58912        let mut r = _mm256_undefined_si256();
58913        _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
58914        let e = _mm256_set1_epi16(u16::MAX as i16);
58915        assert_eq_m256i(r, e);
58916    }
58917
58918    #[simd_test(enable = "avx512f,avx512vl")]
58919    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
58920        let a = _mm256_set1_epi32(i32::MAX);
58921        let mut r = _mm_undefined_si128();
58922        _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
58923        let e = _mm_set1_epi16(u16::MAX as i16);
58924        assert_eq_m128i(r, e);
58925    }
58926
58927    #[simd_test(enable = "avx512f,avx512vl")]
58928    unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
58929        let a = _mm_set1_epi32(i32::MAX);
58930        let mut r = _mm_set1_epi8(0);
58931        _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
58932        let e = _mm_set_epi16(
58933            0,
58934            0,
58935            0,
58936            0,
58937            u16::MAX as i16,
58938            u16::MAX as i16,
58939            u16::MAX as i16,
58940            u16::MAX as i16,
58941        );
58942        assert_eq_m128i(r, e);
58943    }
58944
58945    #[simd_test(enable = "avx512f")]
58946    unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
58947        let a = _mm512_set1_epi32(9);
58948        let mut r = _mm_undefined_si128();
58949        _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
58950        let e = _mm_set1_epi8(9);
58951        assert_eq_m128i(r, e);
58952    }
58953
58954    #[simd_test(enable = "avx512f,avx512vl")]
58955    unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
58956        let a = _mm256_set1_epi32(9);
58957        let mut r = _mm_set1_epi8(0);
58958        _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
58959        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
58960        assert_eq_m128i(r, e);
58961    }
58962
58963    #[simd_test(enable = "avx512f,avx512vl")]
58964    unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
58965        let a = _mm_set1_epi32(9);
58966        let mut r = _mm_set1_epi8(0);
58967        _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
58968        let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
58969        assert_eq_m128i(r, e);
58970    }
58971
58972    #[simd_test(enable = "avx512f")]
58973    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
58974        let a = _mm512_set1_epi32(i32::MAX);
58975        let mut r = _mm_undefined_si128();
58976        _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
58977        let e = _mm_set1_epi8(i8::MAX);
58978        assert_eq_m128i(r, e);
58979    }
58980
58981    #[simd_test(enable = "avx512f,avx512vl")]
58982    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
58983        let a = _mm256_set1_epi32(i32::MAX);
58984        let mut r = _mm_set1_epi8(0);
58985        _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
58986        #[rustfmt::skip]
58987        let e = _mm_set_epi8(
58988            0, 0, 0, 0,
58989            0, 0, 0, 0,
58990            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
58991            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
58992        );
58993        assert_eq_m128i(r, e);
58994    }
58995
58996    #[simd_test(enable = "avx512f,avx512vl")]
58997    unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
58998        let a = _mm_set1_epi32(i32::MAX);
58999        let mut r = _mm_set1_epi8(0);
59000        _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59001        #[rustfmt::skip]
59002        let e = _mm_set_epi8(
59003            0, 0, 0, 0,
59004            0, 0, 0, 0,
59005            0, 0, 0, 0,
59006            i8::MAX, i8::MAX, i8::MAX, i8::MAX,
59007        );
59008        assert_eq_m128i(r, e);
59009    }
59010
59011    #[simd_test(enable = "avx512f")]
59012    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
59013        let a = _mm512_set1_epi32(i32::MAX);
59014        let mut r = _mm_undefined_si128();
59015        _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
59016        let e = _mm_set1_epi8(u8::MAX as i8);
59017        assert_eq_m128i(r, e);
59018    }
59019
59020    #[simd_test(enable = "avx512f,avx512vl")]
59021    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
59022        let a = _mm256_set1_epi32(i32::MAX);
59023        let mut r = _mm_set1_epi8(0);
59024        _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59025        #[rustfmt::skip]
59026        let e = _mm_set_epi8(
59027            0, 0, 0, 0,
59028            0, 0, 0, 0,
59029            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59030            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59031        );
59032        assert_eq_m128i(r, e);
59033    }
59034
59035    #[simd_test(enable = "avx512f,avx512vl")]
59036    unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
59037        let a = _mm_set1_epi32(i32::MAX);
59038        let mut r = _mm_set1_epi8(0);
59039        _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
59040        #[rustfmt::skip]
59041        let e = _mm_set_epi8(
59042            0, 0, 0, 0,
59043            0, 0, 0, 0,
59044            0, 0, 0, 0,
59045            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
59046        );
59047        assert_eq_m128i(r, e);
59048    }
59049
59050    #[simd_test(enable = "avx512f")]
59051    const unsafe fn test_mm512_storeu_epi32() {
59052        let a = _mm512_set1_epi32(9);
59053        let mut r = _mm512_undefined_epi32();
59054        _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59055        assert_eq_m512i(r, a);
59056    }
59057
59058    #[simd_test(enable = "avx512f,avx512vl")]
59059    const unsafe fn test_mm256_storeu_epi32() {
59060        let a = _mm256_set1_epi32(9);
59061        let mut r = _mm256_undefined_si256();
59062        _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59063        assert_eq_m256i(r, a);
59064    }
59065
59066    #[simd_test(enable = "avx512f,avx512vl")]
59067    const unsafe fn test_mm_storeu_epi32() {
59068        let a = _mm_set1_epi32(9);
59069        let mut r = _mm_undefined_si128();
59070        _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
59071        assert_eq_m128i(r, a);
59072    }
59073
59074    #[simd_test(enable = "avx512f")]
59075    const unsafe fn test_mm512_loadu_si512() {
59076        let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
59077        let p = a.as_ptr().cast();
59078        let r = _mm512_loadu_si512(black_box(p));
59079        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
59080        assert_eq_m512i(r, e);
59081    }
59082
59083    #[simd_test(enable = "avx512f")]
59084    const unsafe fn test_mm512_storeu_si512() {
59085        let a = _mm512_set1_epi32(9);
59086        let mut r = _mm512_undefined_epi32();
59087        _mm512_storeu_si512(&mut r as *mut _, a);
59088        assert_eq_m512i(r, a);
59089    }
59090
59091    #[simd_test(enable = "avx512f")]
59092    const unsafe fn test_mm512_load_si512() {
59093        #[repr(align(64))]
59094        struct Align {
59095            data: [i32; 16], // 64 bytes
59096        }
59097        let a = Align {
59098            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
59099        };
59100        let p = (a.data).as_ptr().cast();
59101        let r = _mm512_load_si512(black_box(p));
59102        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
59103        assert_eq_m512i(r, e);
59104    }
59105
59106    #[simd_test(enable = "avx512f")]
59107    const unsafe fn test_mm512_store_si512() {
59108        let a = _mm512_set1_epi32(9);
59109        let mut r = _mm512_undefined_epi32();
59110        _mm512_store_si512(&mut r as *mut _, a);
59111        assert_eq_m512i(r, a);
59112    }
59113
59114    #[simd_test(enable = "avx512f")]
59115    const unsafe fn test_mm512_load_epi32() {
59116        #[repr(align(64))]
59117        struct Align {
59118            data: [i32; 16], // 64 bytes
59119        }
59120        let a = Align {
59121            data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
59122        };
59123        let p = (a.data).as_ptr();
59124        let r = _mm512_load_epi32(black_box(p));
59125        let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
59126        assert_eq_m512i(r, e);
59127    }
59128
59129    #[simd_test(enable = "avx512f,avx512vl")]
59130    const unsafe fn test_mm256_load_epi32() {
59131        #[repr(align(64))]
59132        struct Align {
59133            data: [i32; 8],
59134        }
59135        let a = Align {
59136            data: [4, 3, 2, 5, 8, 9, 64, 50],
59137        };
59138        let p = (a.data).as_ptr();
59139        let r = _mm256_load_epi32(black_box(p));
59140        let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
59141        assert_eq_m256i(r, e);
59142    }
59143
59144    #[simd_test(enable = "avx512f,avx512vl")]
59145    const unsafe fn test_mm_load_epi32() {
59146        #[repr(align(64))]
59147        struct Align {
59148            data: [i32; 4],
59149        }
59150        let a = Align { data: [4, 3, 2, 5] };
59151        let p = (a.data).as_ptr();
59152        let r = _mm_load_epi32(black_box(p));
59153        let e = _mm_setr_epi32(4, 3, 2, 5);
59154        assert_eq_m128i(r, e);
59155    }
59156
59157    #[simd_test(enable = "avx512f")]
59158    const unsafe fn test_mm512_store_epi32() {
59159        let a = _mm512_set1_epi32(9);
59160        let mut r = _mm512_undefined_epi32();
59161        _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
59162        assert_eq_m512i(r, a);
59163    }
59164
59165    #[simd_test(enable = "avx512f,avx512vl")]
59166    const unsafe fn test_mm256_store_epi32() {
59167        let a = _mm256_set1_epi32(9);
59168        let mut r = _mm256_undefined_si256();
59169        _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
59170        assert_eq_m256i(r, a);
59171    }
59172
59173    #[simd_test(enable = "avx512f,avx512vl")]
59174    const unsafe fn test_mm_store_epi32() {
59175        let a = _mm_set1_epi32(9);
59176        let mut r = _mm_undefined_si128();
59177        _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
59178        assert_eq_m128i(r, a);
59179    }
59180
59181    #[simd_test(enable = "avx512f")]
59182    const unsafe fn test_mm512_load_ps() {
59183        #[repr(align(64))]
59184        struct Align {
59185            data: [f32; 16], // 64 bytes
59186        }
59187        let a = Align {
59188            data: [
59189                4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
59190            ],
59191        };
59192        let p = (a.data).as_ptr();
59193        let r = _mm512_load_ps(black_box(p));
59194        let e = _mm512_setr_ps(
59195            4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
59196        );
59197        assert_eq_m512(r, e);
59198    }
59199
59200    #[simd_test(enable = "avx512f")]
59201    const unsafe fn test_mm512_store_ps() {
59202        let a = _mm512_set1_ps(9.);
59203        let mut r = _mm512_undefined_ps();
59204        _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
59205        assert_eq_m512(r, a);
59206    }
59207
59208    #[simd_test(enable = "avx512f")]
59209    const fn test_mm512_mask_set1_epi32() {
59210        let src = _mm512_set1_epi32(2);
59211        let a: i32 = 11;
59212        let r = _mm512_mask_set1_epi32(src, 0, a);
59213        assert_eq_m512i(r, src);
59214        let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
59215        let e = _mm512_set1_epi32(11);
59216        assert_eq_m512i(r, e);
59217    }
59218
59219    #[simd_test(enable = "avx512f")]
59220    const fn test_mm512_maskz_set1_epi32() {
59221        let a: i32 = 11;
59222        let r = _mm512_maskz_set1_epi32(0, a);
59223        assert_eq_m512i(r, _mm512_setzero_si512());
59224        let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
59225        let e = _mm512_set1_epi32(11);
59226        assert_eq_m512i(r, e);
59227    }
59228
59229    #[simd_test(enable = "avx512f,avx512vl")]
59230    const fn test_mm256_mask_set1_epi32() {
59231        let src = _mm256_set1_epi32(2);
59232        let a: i32 = 11;
59233        let r = _mm256_mask_set1_epi32(src, 0, a);
59234        assert_eq_m256i(r, src);
59235        let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
59236        let e = _mm256_set1_epi32(11);
59237        assert_eq_m256i(r, e);
59238    }
59239
59240    #[simd_test(enable = "avx512f,avx512vl")]
59241    const fn test_mm256_maskz_set1_epi32() {
59242        let a: i32 = 11;
59243        let r = _mm256_maskz_set1_epi32(0, a);
59244        assert_eq_m256i(r, _mm256_setzero_si256());
59245        let r = _mm256_maskz_set1_epi32(0b11111111, a);
59246        let e = _mm256_set1_epi32(11);
59247        assert_eq_m256i(r, e);
59248    }
59249
59250    #[simd_test(enable = "avx512f,avx512vl")]
59251    const fn test_mm_mask_set1_epi32() {
59252        let src = _mm_set1_epi32(2);
59253        let a: i32 = 11;
59254        let r = _mm_mask_set1_epi32(src, 0, a);
59255        assert_eq_m128i(r, src);
59256        let r = _mm_mask_set1_epi32(src, 0b00001111, a);
59257        let e = _mm_set1_epi32(11);
59258        assert_eq_m128i(r, e);
59259    }
59260
59261    #[simd_test(enable = "avx512f,avx512vl")]
59262    const fn test_mm_maskz_set1_epi32() {
59263        let a: i32 = 11;
59264        let r = _mm_maskz_set1_epi32(0, a);
59265        assert_eq_m128i(r, _mm_setzero_si128());
59266        let r = _mm_maskz_set1_epi32(0b00001111, a);
59267        let e = _mm_set1_epi32(11);
59268        assert_eq_m128i(r, e);
59269    }
59270
59271    #[simd_test(enable = "avx512f")]
59272    const fn test_mm_mask_move_ss() {
59273        let src = _mm_set_ps(10., 11., 100., 110.);
59274        let a = _mm_set_ps(1., 2., 10., 20.);
59275        let b = _mm_set_ps(3., 4., 30., 40.);
59276        let r = _mm_mask_move_ss(src, 0, a, b);
59277        let e = _mm_set_ps(1., 2., 10., 110.);
59278        assert_eq_m128(r, e);
59279        let r = _mm_mask_move_ss(src, 0b11111111, a, b);
59280        let e = _mm_set_ps(1., 2., 10., 40.);
59281        assert_eq_m128(r, e);
59282    }
59283
59284    #[simd_test(enable = "avx512f")]
59285    const fn test_mm_maskz_move_ss() {
59286        let a = _mm_set_ps(1., 2., 10., 20.);
59287        let b = _mm_set_ps(3., 4., 30., 40.);
59288        let r = _mm_maskz_move_ss(0, a, b);
59289        let e = _mm_set_ps(1., 2., 10., 0.);
59290        assert_eq_m128(r, e);
59291        let r = _mm_maskz_move_ss(0b11111111, a, b);
59292        let e = _mm_set_ps(1., 2., 10., 40.);
59293        assert_eq_m128(r, e);
59294    }
59295
59296    #[simd_test(enable = "avx512f")]
59297    const fn test_mm_mask_move_sd() {
59298        let src = _mm_set_pd(10., 11.);
59299        let a = _mm_set_pd(1., 2.);
59300        let b = _mm_set_pd(3., 4.);
59301        let r = _mm_mask_move_sd(src, 0, a, b);
59302        let e = _mm_set_pd(1., 11.);
59303        assert_eq_m128d(r, e);
59304        let r = _mm_mask_move_sd(src, 0b11111111, a, b);
59305        let e = _mm_set_pd(1., 4.);
59306        assert_eq_m128d(r, e);
59307    }
59308
59309    #[simd_test(enable = "avx512f")]
59310    const fn test_mm_maskz_move_sd() {
59311        let a = _mm_set_pd(1., 2.);
59312        let b = _mm_set_pd(3., 4.);
59313        let r = _mm_maskz_move_sd(0, a, b);
59314        let e = _mm_set_pd(1., 0.);
59315        assert_eq_m128d(r, e);
59316        let r = _mm_maskz_move_sd(0b11111111, a, b);
59317        let e = _mm_set_pd(1., 4.);
59318        assert_eq_m128d(r, e);
59319    }
59320
59321    #[simd_test(enable = "avx512f")]
59322    const fn test_mm_mask_add_ss() {
59323        let src = _mm_set_ps(10., 11., 100., 110.);
59324        let a = _mm_set_ps(1., 2., 10., 20.);
59325        let b = _mm_set_ps(3., 4., 30., 40.);
59326        let r = _mm_mask_add_ss(src, 0, a, b);
59327        let e = _mm_set_ps(1., 2., 10., 110.);
59328        assert_eq_m128(r, e);
59329        let r = _mm_mask_add_ss(src, 0b11111111, a, b);
59330        let e = _mm_set_ps(1., 2., 10., 60.);
59331        assert_eq_m128(r, e);
59332    }
59333
59334    #[simd_test(enable = "avx512f")]
59335    const fn test_mm_maskz_add_ss() {
59336        let a = _mm_set_ps(1., 2., 10., 20.);
59337        let b = _mm_set_ps(3., 4., 30., 40.);
59338        let r = _mm_maskz_add_ss(0, a, b);
59339        let e = _mm_set_ps(1., 2., 10., 0.);
59340        assert_eq_m128(r, e);
59341        let r = _mm_maskz_add_ss(0b11111111, a, b);
59342        let e = _mm_set_ps(1., 2., 10., 60.);
59343        assert_eq_m128(r, e);
59344    }
59345
59346    #[simd_test(enable = "avx512f")]
59347    const fn test_mm_mask_add_sd() {
59348        let src = _mm_set_pd(10., 11.);
59349        let a = _mm_set_pd(1., 2.);
59350        let b = _mm_set_pd(3., 4.);
59351        let r = _mm_mask_add_sd(src, 0, a, b);
59352        let e = _mm_set_pd(1., 11.);
59353        assert_eq_m128d(r, e);
59354        let r = _mm_mask_add_sd(src, 0b11111111, a, b);
59355        let e = _mm_set_pd(1., 6.);
59356        assert_eq_m128d(r, e);
59357    }
59358
59359    #[simd_test(enable = "avx512f")]
59360    const fn test_mm_maskz_add_sd() {
59361        let a = _mm_set_pd(1., 2.);
59362        let b = _mm_set_pd(3., 4.);
59363        let r = _mm_maskz_add_sd(0, a, b);
59364        let e = _mm_set_pd(1., 0.);
59365        assert_eq_m128d(r, e);
59366        let r = _mm_maskz_add_sd(0b11111111, a, b);
59367        let e = _mm_set_pd(1., 6.);
59368        assert_eq_m128d(r, e);
59369    }
59370
59371    #[simd_test(enable = "avx512f")]
59372    const fn test_mm_mask_sub_ss() {
59373        let src = _mm_set_ps(10., 11., 100., 110.);
59374        let a = _mm_set_ps(1., 2., 10., 20.);
59375        let b = _mm_set_ps(3., 4., 30., 40.);
59376        let r = _mm_mask_sub_ss(src, 0, a, b);
59377        let e = _mm_set_ps(1., 2., 10., 110.);
59378        assert_eq_m128(r, e);
59379        let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
59380        let e = _mm_set_ps(1., 2., 10., -20.);
59381        assert_eq_m128(r, e);
59382    }
59383
59384    #[simd_test(enable = "avx512f")]
59385    const fn test_mm_maskz_sub_ss() {
59386        let a = _mm_set_ps(1., 2., 10., 20.);
59387        let b = _mm_set_ps(3., 4., 30., 40.);
59388        let r = _mm_maskz_sub_ss(0, a, b);
59389        let e = _mm_set_ps(1., 2., 10., 0.);
59390        assert_eq_m128(r, e);
59391        let r = _mm_maskz_sub_ss(0b11111111, a, b);
59392        let e = _mm_set_ps(1., 2., 10., -20.);
59393        assert_eq_m128(r, e);
59394    }
59395
59396    #[simd_test(enable = "avx512f")]
59397    const fn test_mm_mask_sub_sd() {
59398        let src = _mm_set_pd(10., 11.);
59399        let a = _mm_set_pd(1., 2.);
59400        let b = _mm_set_pd(3., 4.);
59401        let r = _mm_mask_sub_sd(src, 0, a, b);
59402        let e = _mm_set_pd(1., 11.);
59403        assert_eq_m128d(r, e);
59404        let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
59405        let e = _mm_set_pd(1., -2.);
59406        assert_eq_m128d(r, e);
59407    }
59408
59409    #[simd_test(enable = "avx512f")]
59410    const fn test_mm_maskz_sub_sd() {
59411        let a = _mm_set_pd(1., 2.);
59412        let b = _mm_set_pd(3., 4.);
59413        let r = _mm_maskz_sub_sd(0, a, b);
59414        let e = _mm_set_pd(1., 0.);
59415        assert_eq_m128d(r, e);
59416        let r = _mm_maskz_sub_sd(0b11111111, a, b);
59417        let e = _mm_set_pd(1., -2.);
59418        assert_eq_m128d(r, e);
59419    }
59420
59421    #[simd_test(enable = "avx512f")]
59422    const fn test_mm_mask_mul_ss() {
59423        let src = _mm_set_ps(10., 11., 100., 110.);
59424        let a = _mm_set_ps(1., 2., 10., 20.);
59425        let b = _mm_set_ps(3., 4., 30., 40.);
59426        let r = _mm_mask_mul_ss(src, 0, a, b);
59427        let e = _mm_set_ps(1., 2., 10., 110.);
59428        assert_eq_m128(r, e);
59429        let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
59430        let e = _mm_set_ps(1., 2., 10., 800.);
59431        assert_eq_m128(r, e);
59432    }
59433
59434    #[simd_test(enable = "avx512f")]
59435    const fn test_mm_maskz_mul_ss() {
59436        let a = _mm_set_ps(1., 2., 10., 20.);
59437        let b = _mm_set_ps(3., 4., 30., 40.);
59438        let r = _mm_maskz_mul_ss(0, a, b);
59439        let e = _mm_set_ps(1., 2., 10., 0.);
59440        assert_eq_m128(r, e);
59441        let r = _mm_maskz_mul_ss(0b11111111, a, b);
59442        let e = _mm_set_ps(1., 2., 10., 800.);
59443        assert_eq_m128(r, e);
59444    }
59445
59446    #[simd_test(enable = "avx512f")]
59447    const fn test_mm_mask_mul_sd() {
59448        let src = _mm_set_pd(10., 11.);
59449        let a = _mm_set_pd(1., 2.);
59450        let b = _mm_set_pd(3., 4.);
59451        let r = _mm_mask_mul_sd(src, 0, a, b);
59452        let e = _mm_set_pd(1., 11.);
59453        assert_eq_m128d(r, e);
59454        let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
59455        let e = _mm_set_pd(1., 8.);
59456        assert_eq_m128d(r, e);
59457    }
59458
59459    #[simd_test(enable = "avx512f")]
59460    const fn test_mm_maskz_mul_sd() {
59461        let a = _mm_set_pd(1., 2.);
59462        let b = _mm_set_pd(3., 4.);
59463        let r = _mm_maskz_mul_sd(0, a, b);
59464        let e = _mm_set_pd(1., 0.);
59465        assert_eq_m128d(r, e);
59466        let r = _mm_maskz_mul_sd(0b11111111, a, b);
59467        let e = _mm_set_pd(1., 8.);
59468        assert_eq_m128d(r, e);
59469    }
59470
59471    #[simd_test(enable = "avx512f")]
59472    const fn test_mm_mask_div_ss() {
59473        let src = _mm_set_ps(10., 11., 100., 110.);
59474        let a = _mm_set_ps(1., 2., 10., 20.);
59475        let b = _mm_set_ps(3., 4., 30., 40.);
59476        let r = _mm_mask_div_ss(src, 0, a, b);
59477        let e = _mm_set_ps(1., 2., 10., 110.);
59478        assert_eq_m128(r, e);
59479        let r = _mm_mask_div_ss(src, 0b11111111, a, b);
59480        let e = _mm_set_ps(1., 2., 10., 0.5);
59481        assert_eq_m128(r, e);
59482    }
59483
59484    #[simd_test(enable = "avx512f")]
59485    const fn test_mm_maskz_div_ss() {
59486        let a = _mm_set_ps(1., 2., 10., 20.);
59487        let b = _mm_set_ps(3., 4., 30., 40.);
59488        let r = _mm_maskz_div_ss(0, a, b);
59489        let e = _mm_set_ps(1., 2., 10., 0.);
59490        assert_eq_m128(r, e);
59491        let r = _mm_maskz_div_ss(0b11111111, a, b);
59492        let e = _mm_set_ps(1., 2., 10., 0.5);
59493        assert_eq_m128(r, e);
59494    }
59495
59496    #[simd_test(enable = "avx512f")]
59497    const fn test_mm_mask_div_sd() {
59498        let src = _mm_set_pd(10., 11.);
59499        let a = _mm_set_pd(1., 2.);
59500        let b = _mm_set_pd(3., 4.);
59501        let r = _mm_mask_div_sd(src, 0, a, b);
59502        let e = _mm_set_pd(1., 11.);
59503        assert_eq_m128d(r, e);
59504        let r = _mm_mask_div_sd(src, 0b11111111, a, b);
59505        let e = _mm_set_pd(1., 0.5);
59506        assert_eq_m128d(r, e);
59507    }
59508
59509    #[simd_test(enable = "avx512f")]
59510    const fn test_mm_maskz_div_sd() {
59511        let a = _mm_set_pd(1., 2.);
59512        let b = _mm_set_pd(3., 4.);
59513        let r = _mm_maskz_div_sd(0, a, b);
59514        let e = _mm_set_pd(1., 0.);
59515        assert_eq_m128d(r, e);
59516        let r = _mm_maskz_div_sd(0b11111111, a, b);
59517        let e = _mm_set_pd(1., 0.5);
59518        assert_eq_m128d(r, e);
59519    }
59520
59521    #[simd_test(enable = "avx512f")]
59522    fn test_mm_mask_max_ss() {
59523        let a = _mm_set_ps(0., 1., 2., 3.);
59524        let b = _mm_set_ps(4., 5., 6., 7.);
59525        let r = _mm_mask_max_ss(a, 0, a, b);
59526        let e = _mm_set_ps(0., 1., 2., 3.);
59527        assert_eq_m128(r, e);
59528        let r = _mm_mask_max_ss(a, 0b11111111, a, b);
59529        let e = _mm_set_ps(0., 1., 2., 7.);
59530        assert_eq_m128(r, e);
59531    }
59532
59533    #[simd_test(enable = "avx512f")]
59534    fn test_mm_maskz_max_ss() {
59535        let a = _mm_set_ps(0., 1., 2., 3.);
59536        let b = _mm_set_ps(4., 5., 6., 7.);
59537        let r = _mm_maskz_max_ss(0, a, b);
59538        let e = _mm_set_ps(0., 1., 2., 0.);
59539        assert_eq_m128(r, e);
59540        let r = _mm_maskz_max_ss(0b11111111, a, b);
59541        let e = _mm_set_ps(0., 1., 2., 7.);
59542        assert_eq_m128(r, e);
59543    }
59544
59545    #[simd_test(enable = "avx512f")]
59546    fn test_mm_mask_max_sd() {
59547        let a = _mm_set_pd(0., 1.);
59548        let b = _mm_set_pd(2., 3.);
59549        let r = _mm_mask_max_sd(a, 0, a, b);
59550        let e = _mm_set_pd(0., 1.);
59551        assert_eq_m128d(r, e);
59552        let r = _mm_mask_max_sd(a, 0b11111111, a, b);
59553        let e = _mm_set_pd(0., 3.);
59554        assert_eq_m128d(r, e);
59555    }
59556
59557    #[simd_test(enable = "avx512f")]
59558    fn test_mm_maskz_max_sd() {
59559        let a = _mm_set_pd(0., 1.);
59560        let b = _mm_set_pd(2., 3.);
59561        let r = _mm_maskz_max_sd(0, a, b);
59562        let e = _mm_set_pd(0., 0.);
59563        assert_eq_m128d(r, e);
59564        let r = _mm_maskz_max_sd(0b11111111, a, b);
59565        let e = _mm_set_pd(0., 3.);
59566        assert_eq_m128d(r, e);
59567    }
59568
59569    #[simd_test(enable = "avx512f")]
59570    fn test_mm_mask_min_ss() {
59571        let a = _mm_set_ps(0., 1., 2., 3.);
59572        let b = _mm_set_ps(4., 5., 6., 7.);
59573        let r = _mm_mask_min_ss(a, 0, a, b);
59574        let e = _mm_set_ps(0., 1., 2., 3.);
59575        assert_eq_m128(r, e);
59576        let r = _mm_mask_min_ss(a, 0b11111111, a, b);
59577        let e = _mm_set_ps(0., 1., 2., 3.);
59578        assert_eq_m128(r, e);
59579    }
59580
59581    #[simd_test(enable = "avx512f")]
59582    fn test_mm_maskz_min_ss() {
59583        let a = _mm_set_ps(0., 1., 2., 3.);
59584        let b = _mm_set_ps(4., 5., 6., 7.);
59585        let r = _mm_maskz_min_ss(0, a, b);
59586        let e = _mm_set_ps(0., 1., 2., 0.);
59587        assert_eq_m128(r, e);
59588        let r = _mm_maskz_min_ss(0b11111111, a, b);
59589        let e = _mm_set_ps(0., 1., 2., 3.);
59590        assert_eq_m128(r, e);
59591    }
59592
59593    #[simd_test(enable = "avx512f")]
59594    fn test_mm_mask_min_sd() {
59595        let a = _mm_set_pd(0., 1.);
59596        let b = _mm_set_pd(2., 3.);
59597        let r = _mm_mask_min_sd(a, 0, a, b);
59598        let e = _mm_set_pd(0., 1.);
59599        assert_eq_m128d(r, e);
59600        let r = _mm_mask_min_sd(a, 0b11111111, a, b);
59601        let e = _mm_set_pd(0., 1.);
59602        assert_eq_m128d(r, e);
59603    }
59604
59605    #[simd_test(enable = "avx512f")]
59606    fn test_mm_maskz_min_sd() {
59607        let a = _mm_set_pd(0., 1.);
59608        let b = _mm_set_pd(2., 3.);
59609        let r = _mm_maskz_min_sd(0, a, b);
59610        let e = _mm_set_pd(0., 0.);
59611        assert_eq_m128d(r, e);
59612        let r = _mm_maskz_min_sd(0b11111111, a, b);
59613        let e = _mm_set_pd(0., 1.);
59614        assert_eq_m128d(r, e);
59615    }
59616
59617    #[simd_test(enable = "avx512f")]
59618    fn test_mm_mask_sqrt_ss() {
59619        let src = _mm_set_ps(10., 11., 100., 110.);
59620        let a = _mm_set_ps(1., 2., 10., 20.);
59621        let b = _mm_set_ps(3., 4., 30., 4.);
59622        let r = _mm_mask_sqrt_ss(src, 0, a, b);
59623        let e = _mm_set_ps(1., 2., 10., 110.);
59624        assert_eq_m128(r, e);
59625        let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
59626        let e = _mm_set_ps(1., 2., 10., 2.);
59627        assert_eq_m128(r, e);
59628    }
59629
59630    #[simd_test(enable = "avx512f")]
59631    fn test_mm_maskz_sqrt_ss() {
59632        let a = _mm_set_ps(1., 2., 10., 20.);
59633        let b = _mm_set_ps(3., 4., 30., 4.);
59634        let r = _mm_maskz_sqrt_ss(0, a, b);
59635        let e = _mm_set_ps(1., 2., 10., 0.);
59636        assert_eq_m128(r, e);
59637        let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
59638        let e = _mm_set_ps(1., 2., 10., 2.);
59639        assert_eq_m128(r, e);
59640    }
59641
59642    #[simd_test(enable = "avx512f")]
59643    fn test_mm_mask_sqrt_sd() {
59644        let src = _mm_set_pd(10., 11.);
59645        let a = _mm_set_pd(1., 2.);
59646        let b = _mm_set_pd(3., 4.);
59647        let r = _mm_mask_sqrt_sd(src, 0, a, b);
59648        let e = _mm_set_pd(1., 11.);
59649        assert_eq_m128d(r, e);
59650        let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
59651        let e = _mm_set_pd(1., 2.);
59652        assert_eq_m128d(r, e);
59653    }
59654
59655    #[simd_test(enable = "avx512f")]
59656    fn test_mm_maskz_sqrt_sd() {
59657        let a = _mm_set_pd(1., 2.);
59658        let b = _mm_set_pd(3., 4.);
59659        let r = _mm_maskz_sqrt_sd(0, a, b);
59660        let e = _mm_set_pd(1., 0.);
59661        assert_eq_m128d(r, e);
59662        let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
59663        let e = _mm_set_pd(1., 2.);
59664        assert_eq_m128d(r, e);
59665    }
59666
59667    #[simd_test(enable = "avx512f")]
59668    fn test_mm_rsqrt14_ss() {
59669        let a = _mm_set_ps(1., 2., 10., 20.);
59670        let b = _mm_set_ps(3., 4., 30., 4.);
59671        let r = _mm_rsqrt14_ss(a, b);
59672        let e = _mm_set_ps(1., 2., 10., 0.5);
59673        assert_eq_m128(r, e);
59674    }
59675
59676    #[simd_test(enable = "avx512f")]
59677    fn test_mm_mask_rsqrt14_ss() {
59678        let src = _mm_set_ps(10., 11., 100., 110.);
59679        let a = _mm_set_ps(1., 2., 10., 20.);
59680        let b = _mm_set_ps(3., 4., 30., 4.);
59681        let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
59682        let e = _mm_set_ps(1., 2., 10., 110.);
59683        assert_eq_m128(r, e);
59684        let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
59685        let e = _mm_set_ps(1., 2., 10., 0.5);
59686        assert_eq_m128(r, e);
59687    }
59688
59689    #[simd_test(enable = "avx512f")]
59690    fn test_mm_maskz_rsqrt14_ss() {
59691        let a = _mm_set_ps(1., 2., 10., 20.);
59692        let b = _mm_set_ps(3., 4., 30., 4.);
59693        let r = _mm_maskz_rsqrt14_ss(0, a, b);
59694        let e = _mm_set_ps(1., 2., 10., 0.);
59695        assert_eq_m128(r, e);
59696        let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
59697        let e = _mm_set_ps(1., 2., 10., 0.5);
59698        assert_eq_m128(r, e);
59699    }
59700
59701    #[simd_test(enable = "avx512f")]
59702    fn test_mm_rsqrt14_sd() {
59703        let a = _mm_set_pd(1., 2.);
59704        let b = _mm_set_pd(3., 4.);
59705        let r = _mm_rsqrt14_sd(a, b);
59706        let e = _mm_set_pd(1., 0.5);
59707        assert_eq_m128d(r, e);
59708    }
59709
59710    #[simd_test(enable = "avx512f")]
59711    fn test_mm_mask_rsqrt14_sd() {
59712        let src = _mm_set_pd(10., 11.);
59713        let a = _mm_set_pd(1., 2.);
59714        let b = _mm_set_pd(3., 4.);
59715        let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
59716        let e = _mm_set_pd(1., 11.);
59717        assert_eq_m128d(r, e);
59718        let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
59719        let e = _mm_set_pd(1., 0.5);
59720        assert_eq_m128d(r, e);
59721    }
59722
59723    #[simd_test(enable = "avx512f")]
59724    fn test_mm_maskz_rsqrt14_sd() {
59725        let a = _mm_set_pd(1., 2.);
59726        let b = _mm_set_pd(3., 4.);
59727        let r = _mm_maskz_rsqrt14_sd(0, a, b);
59728        let e = _mm_set_pd(1., 0.);
59729        assert_eq_m128d(r, e);
59730        let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
59731        let e = _mm_set_pd(1., 0.5);
59732        assert_eq_m128d(r, e);
59733    }
59734
59735    #[simd_test(enable = "avx512f")]
59736    fn test_mm_rcp14_ss() {
59737        let a = _mm_set_ps(1., 2., 10., 20.);
59738        let b = _mm_set_ps(3., 4., 30., 4.);
59739        let r = _mm_rcp14_ss(a, b);
59740        let e = _mm_set_ps(1., 2., 10., 0.25);
59741        assert_eq_m128(r, e);
59742    }
59743
59744    #[simd_test(enable = "avx512f")]
59745    fn test_mm_mask_rcp14_ss() {
59746        let src = _mm_set_ps(10., 11., 100., 110.);
59747        let a = _mm_set_ps(1., 2., 10., 20.);
59748        let b = _mm_set_ps(3., 4., 30., 4.);
59749        let r = _mm_mask_rcp14_ss(src, 0, a, b);
59750        let e = _mm_set_ps(1., 2., 10., 110.);
59751        assert_eq_m128(r, e);
59752        let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
59753        let e = _mm_set_ps(1., 2., 10., 0.25);
59754        assert_eq_m128(r, e);
59755    }
59756
59757    #[simd_test(enable = "avx512f")]
59758    fn test_mm_maskz_rcp14_ss() {
59759        let a = _mm_set_ps(1., 2., 10., 20.);
59760        let b = _mm_set_ps(3., 4., 30., 4.);
59761        let r = _mm_maskz_rcp14_ss(0, a, b);
59762        let e = _mm_set_ps(1., 2., 10., 0.);
59763        assert_eq_m128(r, e);
59764        let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
59765        let e = _mm_set_ps(1., 2., 10., 0.25);
59766        assert_eq_m128(r, e);
59767    }
59768
59769    #[simd_test(enable = "avx512f")]
59770    fn test_mm_rcp14_sd() {
59771        let a = _mm_set_pd(1., 2.);
59772        let b = _mm_set_pd(3., 4.);
59773        let r = _mm_rcp14_sd(a, b);
59774        let e = _mm_set_pd(1., 0.25);
59775        assert_eq_m128d(r, e);
59776    }
59777
59778    #[simd_test(enable = "avx512f")]
59779    fn test_mm_mask_rcp14_sd() {
59780        let src = _mm_set_pd(10., 11.);
59781        let a = _mm_set_pd(1., 2.);
59782        let b = _mm_set_pd(3., 4.);
59783        let r = _mm_mask_rcp14_sd(src, 0, a, b);
59784        let e = _mm_set_pd(1., 11.);
59785        assert_eq_m128d(r, e);
59786        let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
59787        let e = _mm_set_pd(1., 0.25);
59788        assert_eq_m128d(r, e);
59789    }
59790
59791    #[simd_test(enable = "avx512f")]
59792    fn test_mm_maskz_rcp14_sd() {
59793        let a = _mm_set_pd(1., 2.);
59794        let b = _mm_set_pd(3., 4.);
59795        let r = _mm_maskz_rcp14_sd(0, a, b);
59796        let e = _mm_set_pd(1., 0.);
59797        assert_eq_m128d(r, e);
59798        let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
59799        let e = _mm_set_pd(1., 0.25);
59800        assert_eq_m128d(r, e);
59801    }
59802
59803    #[simd_test(enable = "avx512f")]
59804    fn test_mm_getexp_ss() {
59805        let a = _mm_set1_ps(2.);
59806        let b = _mm_set1_ps(3.);
59807        let r = _mm_getexp_ss(a, b);
59808        let e = _mm_set_ps(2., 2., 2., 1.);
59809        assert_eq_m128(r, e);
59810    }
59811
59812    #[simd_test(enable = "avx512f")]
59813    fn test_mm_mask_getexp_ss() {
59814        let a = _mm_set1_ps(2.);
59815        let b = _mm_set1_ps(3.);
59816        let r = _mm_mask_getexp_ss(a, 0, a, b);
59817        let e = _mm_set_ps(2., 2., 2., 2.);
59818        assert_eq_m128(r, e);
59819        let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
59820        let e = _mm_set_ps(2., 2., 2., 1.);
59821        assert_eq_m128(r, e);
59822    }
59823
59824    #[simd_test(enable = "avx512f")]
59825    fn test_mm_maskz_getexp_ss() {
59826        let a = _mm_set1_ps(2.);
59827        let b = _mm_set1_ps(3.);
59828        let r = _mm_maskz_getexp_ss(0, a, b);
59829        let e = _mm_set_ps(2., 2., 2., 0.);
59830        assert_eq_m128(r, e);
59831        let r = _mm_maskz_getexp_ss(0b11111111, a, b);
59832        let e = _mm_set_ps(2., 2., 2., 1.);
59833        assert_eq_m128(r, e);
59834    }
59835
59836    #[simd_test(enable = "avx512f")]
59837    fn test_mm_getexp_sd() {
59838        let a = _mm_set1_pd(2.);
59839        let b = _mm_set1_pd(3.);
59840        let r = _mm_getexp_sd(a, b);
59841        let e = _mm_set_pd(2., 1.);
59842        assert_eq_m128d(r, e);
59843    }
59844
59845    #[simd_test(enable = "avx512f")]
59846    fn test_mm_mask_getexp_sd() {
59847        let a = _mm_set1_pd(2.);
59848        let b = _mm_set1_pd(3.);
59849        let r = _mm_mask_getexp_sd(a, 0, a, b);
59850        let e = _mm_set_pd(2., 2.);
59851        assert_eq_m128d(r, e);
59852        let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
59853        let e = _mm_set_pd(2., 1.);
59854        assert_eq_m128d(r, e);
59855    }
59856
59857    #[simd_test(enable = "avx512f")]
59858    fn test_mm_maskz_getexp_sd() {
59859        let a = _mm_set1_pd(2.);
59860        let b = _mm_set1_pd(3.);
59861        let r = _mm_maskz_getexp_sd(0, a, b);
59862        let e = _mm_set_pd(2., 0.);
59863        assert_eq_m128d(r, e);
59864        let r = _mm_maskz_getexp_sd(0b11111111, a, b);
59865        let e = _mm_set_pd(2., 1.);
59866        assert_eq_m128d(r, e);
59867    }
59868
59869    #[simd_test(enable = "avx512f")]
59870    fn test_mm_getmant_ss() {
59871        let a = _mm_set1_ps(20.);
59872        let b = _mm_set1_ps(10.);
59873        let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
59874        let e = _mm_set_ps(20., 20., 20., 1.25);
59875        assert_eq_m128(r, e);
59876    }
59877
59878    #[simd_test(enable = "avx512f")]
59879    fn test_mm_mask_getmant_ss() {
59880        let a = _mm_set1_ps(20.);
59881        let b = _mm_set1_ps(10.);
59882        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
59883        let e = _mm_set_ps(20., 20., 20., 20.);
59884        assert_eq_m128(r, e);
59885        let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
59886        let e = _mm_set_ps(20., 20., 20., 1.25);
59887        assert_eq_m128(r, e);
59888    }
59889
59890    #[simd_test(enable = "avx512f")]
59891    fn test_mm_maskz_getmant_ss() {
59892        let a = _mm_set1_ps(20.);
59893        let b = _mm_set1_ps(10.);
59894        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
59895        let e = _mm_set_ps(20., 20., 20., 0.);
59896        assert_eq_m128(r, e);
59897        let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
59898        let e = _mm_set_ps(20., 20., 20., 1.25);
59899        assert_eq_m128(r, e);
59900    }
59901
59902    #[simd_test(enable = "avx512f")]
59903    fn test_mm_getmant_sd() {
59904        let a = _mm_set1_pd(20.);
59905        let b = _mm_set1_pd(10.);
59906        let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
59907        let e = _mm_set_pd(20., 1.25);
59908        assert_eq_m128d(r, e);
59909    }
59910
59911    #[simd_test(enable = "avx512f")]
59912    fn test_mm_mask_getmant_sd() {
59913        let a = _mm_set1_pd(20.);
59914        let b = _mm_set1_pd(10.);
59915        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
59916        let e = _mm_set_pd(20., 20.);
59917        assert_eq_m128d(r, e);
59918        let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
59919        let e = _mm_set_pd(20., 1.25);
59920        assert_eq_m128d(r, e);
59921    }
59922
59923    #[simd_test(enable = "avx512f")]
59924    fn test_mm_maskz_getmant_sd() {
59925        let a = _mm_set1_pd(20.);
59926        let b = _mm_set1_pd(10.);
59927        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
59928        let e = _mm_set_pd(20., 0.);
59929        assert_eq_m128d(r, e);
59930        let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
59931        let e = _mm_set_pd(20., 1.25);
59932        assert_eq_m128d(r, e);
59933    }
59934
59935    #[simd_test(enable = "avx512f")]
59936    fn test_mm_roundscale_ss() {
59937        let a = _mm_set1_ps(2.2);
59938        let b = _mm_set1_ps(1.1);
59939        let r = _mm_roundscale_ss::<0>(a, b);
59940        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59941        assert_eq_m128(r, e);
59942    }
59943
59944    #[simd_test(enable = "avx512f")]
59945    fn test_mm_mask_roundscale_ss() {
59946        let a = _mm_set1_ps(2.2);
59947        let b = _mm_set1_ps(1.1);
59948        let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
59949        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
59950        assert_eq_m128(r, e);
59951        let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
59952        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59953        assert_eq_m128(r, e);
59954    }
59955
59956    #[simd_test(enable = "avx512f")]
59957    fn test_mm_maskz_roundscale_ss() {
59958        let a = _mm_set1_ps(2.2);
59959        let b = _mm_set1_ps(1.1);
59960        let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
59961        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
59962        assert_eq_m128(r, e);
59963        let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
59964        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59965        assert_eq_m128(r, e);
59966    }
59967
59968    #[simd_test(enable = "avx512f")]
59969    fn test_mm_roundscale_sd() {
59970        let a = _mm_set1_pd(2.2);
59971        let b = _mm_set1_pd(1.1);
59972        let r = _mm_roundscale_sd::<0>(a, b);
59973        let e = _mm_set_pd(2.2, 1.0);
59974        assert_eq_m128d(r, e);
59975    }
59976
59977    #[simd_test(enable = "avx512f")]
59978    fn test_mm_mask_roundscale_sd() {
59979        let a = _mm_set1_pd(2.2);
59980        let b = _mm_set1_pd(1.1);
59981        let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
59982        let e = _mm_set_pd(2.2, 2.2);
59983        assert_eq_m128d(r, e);
59984        let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
59985        let e = _mm_set_pd(2.2, 1.0);
59986        assert_eq_m128d(r, e);
59987    }
59988
59989    #[simd_test(enable = "avx512f")]
59990    fn test_mm_maskz_roundscale_sd() {
59991        let a = _mm_set1_pd(2.2);
59992        let b = _mm_set1_pd(1.1);
59993        let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
59994        let e = _mm_set_pd(2.2, 0.0);
59995        assert_eq_m128d(r, e);
59996        let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
59997        let e = _mm_set_pd(2.2, 1.0);
59998        assert_eq_m128d(r, e);
59999    }
60000
60001    #[simd_test(enable = "avx512f")]
60002    fn test_mm_scalef_ss() {
60003        let a = _mm_set1_ps(1.);
60004        let b = _mm_set1_ps(3.);
60005        let r = _mm_scalef_ss(a, b);
60006        let e = _mm_set_ps(1., 1., 1., 8.);
60007        assert_eq_m128(r, e);
60008    }
60009
60010    #[simd_test(enable = "avx512f")]
60011    fn test_mm_mask_scalef_ss() {
60012        let a = _mm_set1_ps(1.);
60013        let b = _mm_set1_ps(3.);
60014        let r = _mm_mask_scalef_ss(a, 0, a, b);
60015        let e = _mm_set_ps(1., 1., 1., 1.);
60016        assert_eq_m128(r, e);
60017        let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
60018        let e = _mm_set_ps(1., 1., 1., 8.);
60019        assert_eq_m128(r, e);
60020    }
60021
60022    #[simd_test(enable = "avx512f")]
60023    fn test_mm_maskz_scalef_ss() {
60024        let a = _mm_set1_ps(1.);
60025        let b = _mm_set1_ps(3.);
60026        let r = _mm_maskz_scalef_ss(0, a, b);
60027        let e = _mm_set_ps(1., 1., 1., 0.);
60028        assert_eq_m128(r, e);
60029        let r = _mm_maskz_scalef_ss(0b11111111, a, b);
60030        let e = _mm_set_ps(1., 1., 1., 8.);
60031        assert_eq_m128(r, e);
60032    }
60033
60034    #[simd_test(enable = "avx512f")]
60035    fn test_mm_scalef_sd() {
60036        let a = _mm_set1_pd(1.);
60037        let b = _mm_set1_pd(3.);
60038        let r = _mm_scalef_sd(a, b);
60039        let e = _mm_set_pd(1., 8.);
60040        assert_eq_m128d(r, e);
60041    }
60042
60043    #[simd_test(enable = "avx512f")]
60044    fn test_mm_mask_scalef_sd() {
60045        let a = _mm_set1_pd(1.);
60046        let b = _mm_set1_pd(3.);
60047        let r = _mm_mask_scalef_sd(a, 0, a, b);
60048        let e = _mm_set_pd(1., 1.);
60049        assert_eq_m128d(r, e);
60050        let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
60051        let e = _mm_set_pd(1., 8.);
60052        assert_eq_m128d(r, e);
60053    }
60054
60055    #[simd_test(enable = "avx512f")]
60056    fn test_mm_maskz_scalef_sd() {
60057        let a = _mm_set1_pd(1.);
60058        let b = _mm_set1_pd(3.);
60059        let r = _mm_maskz_scalef_sd(0, a, b);
60060        let e = _mm_set_pd(1., 0.);
60061        assert_eq_m128d(r, e);
60062        let r = _mm_maskz_scalef_sd(0b11111111, a, b);
60063        let e = _mm_set_pd(1., 8.);
60064        assert_eq_m128d(r, e);
60065    }
60066
60067    #[simd_test(enable = "avx512f")]
60068    const fn test_mm_mask_fmadd_ss() {
60069        let a = _mm_set1_ps(1.);
60070        let b = _mm_set1_ps(2.);
60071        let c = _mm_set1_ps(3.);
60072        let r = _mm_mask_fmadd_ss(a, 0, b, c);
60073        assert_eq_m128(r, a);
60074        let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
60075        let e = _mm_set_ps(1., 1., 1., 5.);
60076        assert_eq_m128(r, e);
60077    }
60078
60079    #[simd_test(enable = "avx512f")]
60080    const fn test_mm_maskz_fmadd_ss() {
60081        let a = _mm_set1_ps(1.);
60082        let b = _mm_set1_ps(2.);
60083        let c = _mm_set1_ps(3.);
60084        let r = _mm_maskz_fmadd_ss(0, a, b, c);
60085        let e = _mm_set_ps(1., 1., 1., 0.);
60086        assert_eq_m128(r, e);
60087        let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
60088        let e = _mm_set_ps(1., 1., 1., 5.);
60089        assert_eq_m128(r, e);
60090    }
60091
60092    #[simd_test(enable = "avx512f")]
60093    const fn test_mm_mask3_fmadd_ss() {
60094        let a = _mm_set1_ps(1.);
60095        let b = _mm_set1_ps(2.);
60096        let c = _mm_set1_ps(3.);
60097        let r = _mm_mask3_fmadd_ss(a, b, c, 0);
60098        assert_eq_m128(r, c);
60099        let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
60100        let e = _mm_set_ps(3., 3., 3., 5.);
60101        assert_eq_m128(r, e);
60102    }
60103
60104    #[simd_test(enable = "avx512f")]
60105    const fn test_mm_mask_fmadd_sd() {
60106        let a = _mm_set1_pd(1.);
60107        let b = _mm_set1_pd(2.);
60108        let c = _mm_set1_pd(3.);
60109        let r = _mm_mask_fmadd_sd(a, 0, b, c);
60110        assert_eq_m128d(r, a);
60111        let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
60112        let e = _mm_set_pd(1., 5.);
60113        assert_eq_m128d(r, e);
60114    }
60115
60116    #[simd_test(enable = "avx512f")]
60117    const fn test_mm_maskz_fmadd_sd() {
60118        let a = _mm_set1_pd(1.);
60119        let b = _mm_set1_pd(2.);
60120        let c = _mm_set1_pd(3.);
60121        let r = _mm_maskz_fmadd_sd(0, a, b, c);
60122        let e = _mm_set_pd(1., 0.);
60123        assert_eq_m128d(r, e);
60124        let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
60125        let e = _mm_set_pd(1., 5.);
60126        assert_eq_m128d(r, e);
60127    }
60128
60129    #[simd_test(enable = "avx512f")]
60130    const fn test_mm_mask3_fmadd_sd() {
60131        let a = _mm_set1_pd(1.);
60132        let b = _mm_set1_pd(2.);
60133        let c = _mm_set1_pd(3.);
60134        let r = _mm_mask3_fmadd_sd(a, b, c, 0);
60135        assert_eq_m128d(r, c);
60136        let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
60137        let e = _mm_set_pd(3., 5.);
60138        assert_eq_m128d(r, e);
60139    }
60140
60141    #[simd_test(enable = "avx512f")]
60142    const fn test_mm_mask_fmsub_ss() {
60143        let a = _mm_set1_ps(1.);
60144        let b = _mm_set1_ps(2.);
60145        let c = _mm_set1_ps(3.);
60146        let r = _mm_mask_fmsub_ss(a, 0, b, c);
60147        assert_eq_m128(r, a);
60148        let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
60149        let e = _mm_set_ps(1., 1., 1., -1.);
60150        assert_eq_m128(r, e);
60151    }
60152
60153    #[simd_test(enable = "avx512f")]
60154    const fn test_mm_maskz_fmsub_ss() {
60155        let a = _mm_set1_ps(1.);
60156        let b = _mm_set1_ps(2.);
60157        let c = _mm_set1_ps(3.);
60158        let r = _mm_maskz_fmsub_ss(0, a, b, c);
60159        let e = _mm_set_ps(1., 1., 1., 0.);
60160        assert_eq_m128(r, e);
60161        let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
60162        let e = _mm_set_ps(1., 1., 1., -1.);
60163        assert_eq_m128(r, e);
60164    }
60165
60166    #[simd_test(enable = "avx512f")]
60167    const fn test_mm_mask3_fmsub_ss() {
60168        let a = _mm_set1_ps(1.);
60169        let b = _mm_set1_ps(2.);
60170        let c = _mm_set1_ps(3.);
60171        let r = _mm_mask3_fmsub_ss(a, b, c, 0);
60172        assert_eq_m128(r, c);
60173        let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
60174        let e = _mm_set_ps(3., 3., 3., -1.);
60175        assert_eq_m128(r, e);
60176    }
60177
60178    #[simd_test(enable = "avx512f")]
60179    const fn test_mm_mask_fmsub_sd() {
60180        let a = _mm_set1_pd(1.);
60181        let b = _mm_set1_pd(2.);
60182        let c = _mm_set1_pd(3.);
60183        let r = _mm_mask_fmsub_sd(a, 0, b, c);
60184        assert_eq_m128d(r, a);
60185        let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
60186        let e = _mm_set_pd(1., -1.);
60187        assert_eq_m128d(r, e);
60188    }
60189
60190    #[simd_test(enable = "avx512f")]
60191    const fn test_mm_maskz_fmsub_sd() {
60192        let a = _mm_set1_pd(1.);
60193        let b = _mm_set1_pd(2.);
60194        let c = _mm_set1_pd(3.);
60195        let r = _mm_maskz_fmsub_sd(0, a, b, c);
60196        let e = _mm_set_pd(1., 0.);
60197        assert_eq_m128d(r, e);
60198        let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
60199        let e = _mm_set_pd(1., -1.);
60200        assert_eq_m128d(r, e);
60201    }
60202
60203    #[simd_test(enable = "avx512f")]
60204    const fn test_mm_mask3_fmsub_sd() {
60205        let a = _mm_set1_pd(1.);
60206        let b = _mm_set1_pd(2.);
60207        let c = _mm_set1_pd(3.);
60208        let r = _mm_mask3_fmsub_sd(a, b, c, 0);
60209        assert_eq_m128d(r, c);
60210        let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
60211        let e = _mm_set_pd(3., -1.);
60212        assert_eq_m128d(r, e);
60213    }
60214
60215    #[simd_test(enable = "avx512f")]
60216    const fn test_mm_mask_fnmadd_ss() {
60217        let a = _mm_set1_ps(1.);
60218        let b = _mm_set1_ps(2.);
60219        let c = _mm_set1_ps(3.);
60220        let r = _mm_mask_fnmadd_ss(a, 0, b, c);
60221        assert_eq_m128(r, a);
60222        let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
60223        let e = _mm_set_ps(1., 1., 1., 1.);
60224        assert_eq_m128(r, e);
60225    }
60226
60227    #[simd_test(enable = "avx512f")]
60228    const fn test_mm_maskz_fnmadd_ss() {
60229        let a = _mm_set1_ps(1.);
60230        let b = _mm_set1_ps(2.);
60231        let c = _mm_set1_ps(3.);
60232        let r = _mm_maskz_fnmadd_ss(0, a, b, c);
60233        let e = _mm_set_ps(1., 1., 1., 0.);
60234        assert_eq_m128(r, e);
60235        let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
60236        let e = _mm_set_ps(1., 1., 1., 1.);
60237        assert_eq_m128(r, e);
60238    }
60239
60240    #[simd_test(enable = "avx512f")]
60241    const fn test_mm_mask3_fnmadd_ss() {
60242        let a = _mm_set1_ps(1.);
60243        let b = _mm_set1_ps(2.);
60244        let c = _mm_set1_ps(3.);
60245        let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
60246        assert_eq_m128(r, c);
60247        let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
60248        let e = _mm_set_ps(3., 3., 3., 1.);
60249        assert_eq_m128(r, e);
60250    }
60251
60252    #[simd_test(enable = "avx512f")]
60253    const fn test_mm_mask_fnmadd_sd() {
60254        let a = _mm_set1_pd(1.);
60255        let b = _mm_set1_pd(2.);
60256        let c = _mm_set1_pd(3.);
60257        let r = _mm_mask_fnmadd_sd(a, 0, b, c);
60258        assert_eq_m128d(r, a);
60259        let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
60260        let e = _mm_set_pd(1., 1.);
60261        assert_eq_m128d(r, e);
60262    }
60263
60264    #[simd_test(enable = "avx512f")]
60265    const fn test_mm_maskz_fnmadd_sd() {
60266        let a = _mm_set1_pd(1.);
60267        let b = _mm_set1_pd(2.);
60268        let c = _mm_set1_pd(3.);
60269        let r = _mm_maskz_fnmadd_sd(0, a, b, c);
60270        let e = _mm_set_pd(1., 0.);
60271        assert_eq_m128d(r, e);
60272        let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
60273        let e = _mm_set_pd(1., 1.);
60274        assert_eq_m128d(r, e);
60275    }
60276
60277    #[simd_test(enable = "avx512f")]
60278    const fn test_mm_mask3_fnmadd_sd() {
60279        let a = _mm_set1_pd(1.);
60280        let b = _mm_set1_pd(2.);
60281        let c = _mm_set1_pd(3.);
60282        let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
60283        assert_eq_m128d(r, c);
60284        let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
60285        let e = _mm_set_pd(3., 1.);
60286        assert_eq_m128d(r, e);
60287    }
60288
60289    #[simd_test(enable = "avx512f")]
60290    const fn test_mm_mask_fnmsub_ss() {
60291        let a = _mm_set1_ps(1.);
60292        let b = _mm_set1_ps(2.);
60293        let c = _mm_set1_ps(3.);
60294        let r = _mm_mask_fnmsub_ss(a, 0, b, c);
60295        assert_eq_m128(r, a);
60296        let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
60297        let e = _mm_set_ps(1., 1., 1., -5.);
60298        assert_eq_m128(r, e);
60299    }
60300
60301    #[simd_test(enable = "avx512f")]
60302    const fn test_mm_maskz_fnmsub_ss() {
60303        let a = _mm_set1_ps(1.);
60304        let b = _mm_set1_ps(2.);
60305        let c = _mm_set1_ps(3.);
60306        let r = _mm_maskz_fnmsub_ss(0, a, b, c);
60307        let e = _mm_set_ps(1., 1., 1., 0.);
60308        assert_eq_m128(r, e);
60309        let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
60310        let e = _mm_set_ps(1., 1., 1., -5.);
60311        assert_eq_m128(r, e);
60312    }
60313
60314    #[simd_test(enable = "avx512f")]
60315    const fn test_mm_mask3_fnmsub_ss() {
60316        let a = _mm_set1_ps(1.);
60317        let b = _mm_set1_ps(2.);
60318        let c = _mm_set1_ps(3.);
60319        let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
60320        assert_eq_m128(r, c);
60321        let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
60322        let e = _mm_set_ps(3., 3., 3., -5.);
60323        assert_eq_m128(r, e);
60324    }
60325
60326    #[simd_test(enable = "avx512f")]
60327    const fn test_mm_mask_fnmsub_sd() {
60328        let a = _mm_set1_pd(1.);
60329        let b = _mm_set1_pd(2.);
60330        let c = _mm_set1_pd(3.);
60331        let r = _mm_mask_fnmsub_sd(a, 0, b, c);
60332        assert_eq_m128d(r, a);
60333        let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
60334        let e = _mm_set_pd(1., -5.);
60335        assert_eq_m128d(r, e);
60336    }
60337
60338    #[simd_test(enable = "avx512f")]
60339    const fn test_mm_maskz_fnmsub_sd() {
60340        let a = _mm_set1_pd(1.);
60341        let b = _mm_set1_pd(2.);
60342        let c = _mm_set1_pd(3.);
60343        let r = _mm_maskz_fnmsub_sd(0, a, b, c);
60344        let e = _mm_set_pd(1., 0.);
60345        assert_eq_m128d(r, e);
60346        let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
60347        let e = _mm_set_pd(1., -5.);
60348        assert_eq_m128d(r, e);
60349    }
60350
60351    #[simd_test(enable = "avx512f")]
60352    const fn test_mm_mask3_fnmsub_sd() {
60353        let a = _mm_set1_pd(1.);
60354        let b = _mm_set1_pd(2.);
60355        let c = _mm_set1_pd(3.);
60356        let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
60357        assert_eq_m128d(r, c);
60358        let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
60359        let e = _mm_set_pd(3., -5.);
60360        assert_eq_m128d(r, e);
60361    }
60362
60363    #[simd_test(enable = "avx512f")]
60364    fn test_mm_add_round_ss() {
60365        let a = _mm_set_ps(1., 2., 10., 20.);
60366        let b = _mm_set_ps(3., 4., 30., 40.);
60367        let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60368        let e = _mm_set_ps(1., 2., 10., 60.);
60369        assert_eq_m128(r, e);
60370    }
60371
60372    #[simd_test(enable = "avx512f")]
60373    fn test_mm_mask_add_round_ss() {
60374        let src = _mm_set_ps(10., 11., 100., 110.);
60375        let a = _mm_set_ps(1., 2., 10., 20.);
60376        let b = _mm_set_ps(3., 4., 30., 40.);
60377        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60378        let e = _mm_set_ps(1., 2., 10., 110.);
60379        assert_eq_m128(r, e);
60380        let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60381            src, 0b11111111, a, b,
60382        );
60383        let e = _mm_set_ps(1., 2., 10., 60.);
60384        assert_eq_m128(r, e);
60385    }
60386
60387    #[simd_test(enable = "avx512f")]
60388    fn test_mm_maskz_add_round_ss() {
60389        let a = _mm_set_ps(1., 2., 10., 20.);
60390        let b = _mm_set_ps(3., 4., 30., 40.);
60391        let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60392        let e = _mm_set_ps(1., 2., 10., 0.);
60393        assert_eq_m128(r, e);
60394        let r =
60395            _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60396        let e = _mm_set_ps(1., 2., 10., 60.);
60397        assert_eq_m128(r, e);
60398    }
60399
60400    #[simd_test(enable = "avx512f")]
60401    fn test_mm_add_round_sd() {
60402        let a = _mm_set_pd(1., 2.);
60403        let b = _mm_set_pd(3., 4.);
60404        let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60405        let e = _mm_set_pd(1., 6.);
60406        assert_eq_m128d(r, e);
60407    }
60408
60409    #[simd_test(enable = "avx512f")]
60410    fn test_mm_mask_add_round_sd() {
60411        let src = _mm_set_pd(10., 11.);
60412        let a = _mm_set_pd(1., 2.);
60413        let b = _mm_set_pd(3., 4.);
60414        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60415        let e = _mm_set_pd(1., 11.);
60416        assert_eq_m128d(r, e);
60417        let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60418            src, 0b11111111, a, b,
60419        );
60420        let e = _mm_set_pd(1., 6.);
60421        assert_eq_m128d(r, e);
60422    }
60423
60424    #[simd_test(enable = "avx512f")]
60425    fn test_mm_maskz_add_round_sd() {
60426        let a = _mm_set_pd(1., 2.);
60427        let b = _mm_set_pd(3., 4.);
60428        let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60429        let e = _mm_set_pd(1., 0.);
60430        assert_eq_m128d(r, e);
60431        let r =
60432            _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60433        let e = _mm_set_pd(1., 6.);
60434        assert_eq_m128d(r, e);
60435    }
60436
60437    #[simd_test(enable = "avx512f")]
60438    fn test_mm_sub_round_ss() {
60439        let a = _mm_set_ps(1., 2., 10., 20.);
60440        let b = _mm_set_ps(3., 4., 30., 40.);
60441        let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60442        let e = _mm_set_ps(1., 2., 10., -20.);
60443        assert_eq_m128(r, e);
60444    }
60445
60446    #[simd_test(enable = "avx512f")]
60447    fn test_mm_mask_sub_round_ss() {
60448        let src = _mm_set_ps(10., 11., 100., 110.);
60449        let a = _mm_set_ps(1., 2., 10., 20.);
60450        let b = _mm_set_ps(3., 4., 30., 40.);
60451        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60452        let e = _mm_set_ps(1., 2., 10., 110.);
60453        assert_eq_m128(r, e);
60454        let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60455            src, 0b11111111, a, b,
60456        );
60457        let e = _mm_set_ps(1., 2., 10., -20.);
60458        assert_eq_m128(r, e);
60459    }
60460
60461    #[simd_test(enable = "avx512f")]
60462    fn test_mm_maskz_sub_round_ss() {
60463        let a = _mm_set_ps(1., 2., 10., 20.);
60464        let b = _mm_set_ps(3., 4., 30., 40.);
60465        let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60466        let e = _mm_set_ps(1., 2., 10., 0.);
60467        assert_eq_m128(r, e);
60468        let r =
60469            _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60470        let e = _mm_set_ps(1., 2., 10., -20.);
60471        assert_eq_m128(r, e);
60472    }
60473
60474    #[simd_test(enable = "avx512f")]
60475    fn test_mm_sub_round_sd() {
60476        let a = _mm_set_pd(1., 2.);
60477        let b = _mm_set_pd(3., 4.);
60478        let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60479        let e = _mm_set_pd(1., -2.);
60480        assert_eq_m128d(r, e);
60481    }
60482
60483    #[simd_test(enable = "avx512f")]
60484    fn test_mm_mask_sub_round_sd() {
60485        let src = _mm_set_pd(10., 11.);
60486        let a = _mm_set_pd(1., 2.);
60487        let b = _mm_set_pd(3., 4.);
60488        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60489        let e = _mm_set_pd(1., 11.);
60490        assert_eq_m128d(r, e);
60491        let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60492            src, 0b11111111, a, b,
60493        );
60494        let e = _mm_set_pd(1., -2.);
60495        assert_eq_m128d(r, e);
60496    }
60497
60498    #[simd_test(enable = "avx512f")]
60499    fn test_mm_maskz_sub_round_sd() {
60500        let a = _mm_set_pd(1., 2.);
60501        let b = _mm_set_pd(3., 4.);
60502        let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60503        let e = _mm_set_pd(1., 0.);
60504        assert_eq_m128d(r, e);
60505        let r =
60506            _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60507        let e = _mm_set_pd(1., -2.);
60508        assert_eq_m128d(r, e);
60509    }
60510
60511    #[simd_test(enable = "avx512f")]
60512    fn test_mm_mul_round_ss() {
60513        let a = _mm_set_ps(1., 2., 10., 20.);
60514        let b = _mm_set_ps(3., 4., 30., 40.);
60515        let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60516        let e = _mm_set_ps(1., 2., 10., 800.);
60517        assert_eq_m128(r, e);
60518    }
60519
60520    #[simd_test(enable = "avx512f")]
60521    fn test_mm_mask_mul_round_ss() {
60522        let src = _mm_set_ps(10., 11., 100., 110.);
60523        let a = _mm_set_ps(1., 2., 10., 20.);
60524        let b = _mm_set_ps(3., 4., 30., 40.);
60525        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60526        let e = _mm_set_ps(1., 2., 10., 110.);
60527        assert_eq_m128(r, e);
60528        let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60529            src, 0b11111111, a, b,
60530        );
60531        let e = _mm_set_ps(1., 2., 10., 800.);
60532        assert_eq_m128(r, e);
60533    }
60534
60535    #[simd_test(enable = "avx512f")]
60536    fn test_mm_maskz_mul_round_ss() {
60537        let a = _mm_set_ps(1., 2., 10., 20.);
60538        let b = _mm_set_ps(3., 4., 30., 40.);
60539        let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60540        let e = _mm_set_ps(1., 2., 10., 0.);
60541        assert_eq_m128(r, e);
60542        let r =
60543            _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60544        let e = _mm_set_ps(1., 2., 10., 800.);
60545        assert_eq_m128(r, e);
60546    }
60547
60548    #[simd_test(enable = "avx512f")]
60549    fn test_mm_mul_round_sd() {
60550        let a = _mm_set_pd(1., 2.);
60551        let b = _mm_set_pd(3., 4.);
60552        let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60553        let e = _mm_set_pd(1., 8.);
60554        assert_eq_m128d(r, e);
60555    }
60556
60557    #[simd_test(enable = "avx512f")]
60558    fn test_mm_mask_mul_round_sd() {
60559        let src = _mm_set_pd(10., 11.);
60560        let a = _mm_set_pd(1., 2.);
60561        let b = _mm_set_pd(3., 4.);
60562        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60563        let e = _mm_set_pd(1., 11.);
60564        assert_eq_m128d(r, e);
60565        let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60566            src, 0b11111111, a, b,
60567        );
60568        let e = _mm_set_pd(1., 8.);
60569        assert_eq_m128d(r, e);
60570    }
60571
60572    #[simd_test(enable = "avx512f")]
60573    fn test_mm_maskz_mul_round_sd() {
60574        let a = _mm_set_pd(1., 2.);
60575        let b = _mm_set_pd(3., 4.);
60576        let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60577        let e = _mm_set_pd(1., 0.);
60578        assert_eq_m128d(r, e);
60579        let r =
60580            _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60581        let e = _mm_set_pd(1., 8.);
60582        assert_eq_m128d(r, e);
60583    }
60584
60585    #[simd_test(enable = "avx512f")]
60586    fn test_mm_div_round_ss() {
60587        let a = _mm_set_ps(1., 2., 10., 20.);
60588        let b = _mm_set_ps(3., 4., 30., 40.);
60589        let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60590        let e = _mm_set_ps(1., 2., 10., 0.5);
60591        assert_eq_m128(r, e);
60592    }
60593
60594    #[simd_test(enable = "avx512f")]
60595    fn test_mm_mask_div_round_ss() {
60596        let src = _mm_set_ps(10., 11., 100., 110.);
60597        let a = _mm_set_ps(1., 2., 10., 20.);
60598        let b = _mm_set_ps(3., 4., 30., 40.);
60599        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60600        let e = _mm_set_ps(1., 2., 10., 110.);
60601        assert_eq_m128(r, e);
60602        let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60603            src, 0b11111111, a, b,
60604        );
60605        let e = _mm_set_ps(1., 2., 10., 0.5);
60606        assert_eq_m128(r, e);
60607    }
60608
60609    #[simd_test(enable = "avx512f")]
60610    fn test_mm_maskz_div_round_ss() {
60611        let a = _mm_set_ps(1., 2., 10., 20.);
60612        let b = _mm_set_ps(3., 4., 30., 40.);
60613        let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60614        let e = _mm_set_ps(1., 2., 10., 0.);
60615        assert_eq_m128(r, e);
60616        let r =
60617            _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60618        let e = _mm_set_ps(1., 2., 10., 0.5);
60619        assert_eq_m128(r, e);
60620    }
60621
60622    #[simd_test(enable = "avx512f")]
60623    fn test_mm_div_round_sd() {
60624        let a = _mm_set_pd(1., 2.);
60625        let b = _mm_set_pd(3., 4.);
60626        let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60627        let e = _mm_set_pd(1., 0.5);
60628        assert_eq_m128d(r, e);
60629    }
60630
60631    #[simd_test(enable = "avx512f")]
60632    fn test_mm_mask_div_round_sd() {
60633        let src = _mm_set_pd(10., 11.);
60634        let a = _mm_set_pd(1., 2.);
60635        let b = _mm_set_pd(3., 4.);
60636        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60637        let e = _mm_set_pd(1., 11.);
60638        assert_eq_m128d(r, e);
60639        let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60640            src, 0b11111111, a, b,
60641        );
60642        let e = _mm_set_pd(1., 0.5);
60643        assert_eq_m128d(r, e);
60644    }
60645
60646    #[simd_test(enable = "avx512f")]
60647    fn test_mm_maskz_div_round_sd() {
60648        let a = _mm_set_pd(1., 2.);
60649        let b = _mm_set_pd(3., 4.);
60650        let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60651        let e = _mm_set_pd(1., 0.);
60652        assert_eq_m128d(r, e);
60653        let r =
60654            _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60655        let e = _mm_set_pd(1., 0.5);
60656        assert_eq_m128d(r, e);
60657    }
60658
60659    #[simd_test(enable = "avx512f")]
60660    fn test_mm_max_round_ss() {
60661        let a = _mm_set_ps(0., 1., 2., 3.);
60662        let b = _mm_set_ps(4., 5., 6., 7.);
60663        let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
60664        let e = _mm_set_ps(0., 1., 2., 7.);
60665        assert_eq_m128(r, e);
60666    }
60667
60668    #[simd_test(enable = "avx512f")]
60669    fn test_mm_mask_max_round_ss() {
60670        let a = _mm_set_ps(0., 1., 2., 3.);
60671        let b = _mm_set_ps(4., 5., 6., 7.);
60672        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60673        let e = _mm_set_ps(0., 1., 2., 3.);
60674        assert_eq_m128(r, e);
60675        let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60676        let e = _mm_set_ps(0., 1., 2., 7.);
60677        assert_eq_m128(r, e);
60678    }
60679
60680    #[simd_test(enable = "avx512f")]
60681    fn test_mm_maskz_max_round_ss() {
60682        let a = _mm_set_ps(0., 1., 2., 3.);
60683        let b = _mm_set_ps(4., 5., 6., 7.);
60684        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60685        let e = _mm_set_ps(0., 1., 2., 0.);
60686        assert_eq_m128(r, e);
60687        let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60688        let e = _mm_set_ps(0., 1., 2., 7.);
60689        assert_eq_m128(r, e);
60690    }
60691
60692    #[simd_test(enable = "avx512f")]
60693    fn test_mm_max_round_sd() {
60694        let a = _mm_set_pd(0., 1.);
60695        let b = _mm_set_pd(2., 3.);
60696        let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60697        let e = _mm_set_pd(0., 3.);
60698        assert_eq_m128d(r, e);
60699    }
60700
60701    #[simd_test(enable = "avx512f")]
60702    fn test_mm_mask_max_round_sd() {
60703        let a = _mm_set_pd(0., 1.);
60704        let b = _mm_set_pd(2., 3.);
60705        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60706        let e = _mm_set_pd(0., 1.);
60707        assert_eq_m128d(r, e);
60708        let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60709        let e = _mm_set_pd(0., 3.);
60710        assert_eq_m128d(r, e);
60711    }
60712
60713    #[simd_test(enable = "avx512f")]
60714    fn test_mm_maskz_max_round_sd() {
60715        let a = _mm_set_pd(0., 1.);
60716        let b = _mm_set_pd(2., 3.);
60717        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60718        let e = _mm_set_pd(0., 0.);
60719        assert_eq_m128d(r, e);
60720        let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60721        let e = _mm_set_pd(0., 3.);
60722        assert_eq_m128d(r, e);
60723    }
60724
60725    #[simd_test(enable = "avx512f")]
60726    fn test_mm_min_round_ss() {
60727        let a = _mm_set_ps(0., 1., 2., 3.);
60728        let b = _mm_set_ps(4., 5., 6., 7.);
60729        let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
60730        let e = _mm_set_ps(0., 1., 2., 3.);
60731        assert_eq_m128(r, e);
60732    }
60733
60734    #[simd_test(enable = "avx512f")]
60735    fn test_mm_mask_min_round_ss() {
60736        let a = _mm_set_ps(0., 1., 2., 3.);
60737        let b = _mm_set_ps(4., 5., 6., 7.);
60738        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60739        let e = _mm_set_ps(0., 1., 2., 3.);
60740        assert_eq_m128(r, e);
60741        let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60742        let e = _mm_set_ps(0., 1., 2., 3.);
60743        assert_eq_m128(r, e);
60744    }
60745
60746    #[simd_test(enable = "avx512f")]
60747    fn test_mm_maskz_min_round_ss() {
60748        let a = _mm_set_ps(0., 1., 2., 3.);
60749        let b = _mm_set_ps(4., 5., 6., 7.);
60750        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60751        let e = _mm_set_ps(0., 1., 2., 0.);
60752        assert_eq_m128(r, e);
60753        let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60754        let e = _mm_set_ps(0., 1., 2., 3.);
60755        assert_eq_m128(r, e);
60756    }
60757
60758    #[simd_test(enable = "avx512f")]
60759    fn test_mm_min_round_sd() {
60760        let a = _mm_set_pd(0., 1.);
60761        let b = _mm_set_pd(2., 3.);
60762        let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60763        let e = _mm_set_pd(0., 1.);
60764        assert_eq_m128d(r, e);
60765    }
60766
60767    #[simd_test(enable = "avx512f")]
60768    fn test_mm_mask_min_round_sd() {
60769        let a = _mm_set_pd(0., 1.);
60770        let b = _mm_set_pd(2., 3.);
60771        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60772        let e = _mm_set_pd(0., 1.);
60773        assert_eq_m128d(r, e);
60774        let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60775        let e = _mm_set_pd(0., 1.);
60776        assert_eq_m128d(r, e);
60777    }
60778
60779    #[simd_test(enable = "avx512f")]
60780    fn test_mm_maskz_min_round_sd() {
60781        let a = _mm_set_pd(0., 1.);
60782        let b = _mm_set_pd(2., 3.);
60783        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60784        let e = _mm_set_pd(0., 0.);
60785        assert_eq_m128d(r, e);
60786        let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60787        let e = _mm_set_pd(0., 1.);
60788        assert_eq_m128d(r, e);
60789    }
60790
60791    #[simd_test(enable = "avx512f")]
60792    fn test_mm_sqrt_round_ss() {
60793        let a = _mm_set_ps(1., 2., 10., 20.);
60794        let b = _mm_set_ps(3., 4., 30., 4.);
60795        let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60796        let e = _mm_set_ps(1., 2., 10., 2.);
60797        assert_eq_m128(r, e);
60798    }
60799
60800    #[simd_test(enable = "avx512f")]
60801    fn test_mm_mask_sqrt_round_ss() {
60802        let src = _mm_set_ps(10., 11., 100., 110.);
60803        let a = _mm_set_ps(1., 2., 10., 20.);
60804        let b = _mm_set_ps(3., 4., 30., 4.);
60805        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60806        let e = _mm_set_ps(1., 2., 10., 110.);
60807        assert_eq_m128(r, e);
60808        let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60809            src, 0b11111111, a, b,
60810        );
60811        let e = _mm_set_ps(1., 2., 10., 2.);
60812        assert_eq_m128(r, e);
60813    }
60814
60815    #[simd_test(enable = "avx512f")]
60816    fn test_mm_maskz_sqrt_round_ss() {
60817        let a = _mm_set_ps(1., 2., 10., 20.);
60818        let b = _mm_set_ps(3., 4., 30., 4.);
60819        let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60820        let e = _mm_set_ps(1., 2., 10., 0.);
60821        assert_eq_m128(r, e);
60822        let r =
60823            _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60824        let e = _mm_set_ps(1., 2., 10., 2.);
60825        assert_eq_m128(r, e);
60826    }
60827
60828    #[simd_test(enable = "avx512f")]
60829    fn test_mm_sqrt_round_sd() {
60830        let a = _mm_set_pd(1., 2.);
60831        let b = _mm_set_pd(3., 4.);
60832        let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60833        let e = _mm_set_pd(1., 2.);
60834        assert_eq_m128d(r, e);
60835    }
60836
60837    #[simd_test(enable = "avx512f")]
60838    fn test_mm_mask_sqrt_round_sd() {
60839        let src = _mm_set_pd(10., 11.);
60840        let a = _mm_set_pd(1., 2.);
60841        let b = _mm_set_pd(3., 4.);
60842        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
60843        let e = _mm_set_pd(1., 11.);
60844        assert_eq_m128d(r, e);
60845        let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60846            src, 0b11111111, a, b,
60847        );
60848        let e = _mm_set_pd(1., 2.);
60849        assert_eq_m128d(r, e);
60850    }
60851
60852    #[simd_test(enable = "avx512f")]
60853    fn test_mm_maskz_sqrt_round_sd() {
60854        let a = _mm_set_pd(1., 2.);
60855        let b = _mm_set_pd(3., 4.);
60856        let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60857        let e = _mm_set_pd(1., 0.);
60858        assert_eq_m128d(r, e);
60859        let r =
60860            _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
60861        let e = _mm_set_pd(1., 2.);
60862        assert_eq_m128d(r, e);
60863    }
60864
60865    #[simd_test(enable = "avx512f")]
60866    fn test_mm_getexp_round_ss() {
60867        let a = _mm_set1_ps(2.);
60868        let b = _mm_set1_ps(3.);
60869        let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
60870        let e = _mm_set_ps(2., 2., 2., 1.);
60871        assert_eq_m128(r, e);
60872    }
60873
60874    #[simd_test(enable = "avx512f")]
60875    fn test_mm_mask_getexp_round_ss() {
60876        let a = _mm_set1_ps(2.);
60877        let b = _mm_set1_ps(3.);
60878        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60879        let e = _mm_set_ps(2., 2., 2., 2.);
60880        assert_eq_m128(r, e);
60881        let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60882        let e = _mm_set_ps(2., 2., 2., 1.);
60883        assert_eq_m128(r, e);
60884    }
60885
60886    #[simd_test(enable = "avx512f")]
60887    fn test_mm_maskz_getexp_round_ss() {
60888        let a = _mm_set1_ps(2.);
60889        let b = _mm_set1_ps(3.);
60890        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60891        let e = _mm_set_ps(2., 2., 2., 0.);
60892        assert_eq_m128(r, e);
60893        let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60894        let e = _mm_set_ps(2., 2., 2., 1.);
60895        assert_eq_m128(r, e);
60896    }
60897
60898    #[simd_test(enable = "avx512f")]
60899    fn test_mm_getexp_round_sd() {
60900        let a = _mm_set1_pd(2.);
60901        let b = _mm_set1_pd(3.);
60902        let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60903        let e = _mm_set_pd(2., 1.);
60904        assert_eq_m128d(r, e);
60905    }
60906
60907    #[simd_test(enable = "avx512f")]
60908    fn test_mm_mask_getexp_round_sd() {
60909        let a = _mm_set1_pd(2.);
60910        let b = _mm_set1_pd(3.);
60911        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60912        let e = _mm_set_pd(2., 2.);
60913        assert_eq_m128d(r, e);
60914        let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60915        let e = _mm_set_pd(2., 1.);
60916        assert_eq_m128d(r, e);
60917    }
60918
60919    #[simd_test(enable = "avx512f")]
60920    fn test_mm_maskz_getexp_round_sd() {
60921        let a = _mm_set1_pd(2.);
60922        let b = _mm_set1_pd(3.);
60923        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60924        let e = _mm_set_pd(2., 0.);
60925        assert_eq_m128d(r, e);
60926        let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60927        let e = _mm_set_pd(2., 1.);
60928        assert_eq_m128d(r, e);
60929    }
60930
60931    #[simd_test(enable = "avx512f")]
60932    fn test_mm_getmant_round_ss() {
60933        let a = _mm_set1_ps(20.);
60934        let b = _mm_set1_ps(10.);
60935        let r =
60936            _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
60937                a, b,
60938            );
60939        let e = _mm_set_ps(20., 20., 20., 1.25);
60940        assert_eq_m128(r, e);
60941    }
60942
60943    #[simd_test(enable = "avx512f")]
60944    fn test_mm_mask_getmant_round_ss() {
60945        let a = _mm_set1_ps(20.);
60946        let b = _mm_set1_ps(10.);
60947        let r = _mm_mask_getmant_round_ss::<
60948            _MM_MANT_NORM_1_2,
60949            _MM_MANT_SIGN_SRC,
60950            _MM_FROUND_CUR_DIRECTION,
60951        >(a, 0, a, b);
60952        let e = _mm_set_ps(20., 20., 20., 20.);
60953        assert_eq_m128(r, e);
60954        let r = _mm_mask_getmant_round_ss::<
60955            _MM_MANT_NORM_1_2,
60956            _MM_MANT_SIGN_SRC,
60957            _MM_FROUND_CUR_DIRECTION,
60958        >(a, 0b11111111, a, b);
60959        let e = _mm_set_ps(20., 20., 20., 1.25);
60960        assert_eq_m128(r, e);
60961    }
60962
60963    #[simd_test(enable = "avx512f")]
60964    fn test_mm_maskz_getmant_round_ss() {
60965        let a = _mm_set1_ps(20.);
60966        let b = _mm_set1_ps(10.);
60967        let r = _mm_maskz_getmant_round_ss::<
60968            _MM_MANT_NORM_1_2,
60969            _MM_MANT_SIGN_SRC,
60970            _MM_FROUND_CUR_DIRECTION,
60971        >(0, a, b);
60972        let e = _mm_set_ps(20., 20., 20., 0.);
60973        assert_eq_m128(r, e);
60974        let r = _mm_maskz_getmant_round_ss::<
60975            _MM_MANT_NORM_1_2,
60976            _MM_MANT_SIGN_SRC,
60977            _MM_FROUND_CUR_DIRECTION,
60978        >(0b11111111, a, b);
60979        let e = _mm_set_ps(20., 20., 20., 1.25);
60980        assert_eq_m128(r, e);
60981    }
60982
60983    #[simd_test(enable = "avx512f")]
60984    fn test_mm_getmant_round_sd() {
60985        let a = _mm_set1_pd(20.);
60986        let b = _mm_set1_pd(10.);
60987        let r =
60988            _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
60989                a, b,
60990            );
60991        let e = _mm_set_pd(20., 1.25);
60992        assert_eq_m128d(r, e);
60993    }
60994
60995    #[simd_test(enable = "avx512f")]
60996    fn test_mm_mask_getmant_round_sd() {
60997        let a = _mm_set1_pd(20.);
60998        let b = _mm_set1_pd(10.);
60999        let r = _mm_mask_getmant_round_sd::<
61000            _MM_MANT_NORM_1_2,
61001            _MM_MANT_SIGN_SRC,
61002            _MM_FROUND_CUR_DIRECTION,
61003        >(a, 0, a, b);
61004        let e = _mm_set_pd(20., 20.);
61005        assert_eq_m128d(r, e);
61006        let r = _mm_mask_getmant_round_sd::<
61007            _MM_MANT_NORM_1_2,
61008            _MM_MANT_SIGN_SRC,
61009            _MM_FROUND_CUR_DIRECTION,
61010        >(a, 0b11111111, a, b);
61011        let e = _mm_set_pd(20., 1.25);
61012        assert_eq_m128d(r, e);
61013    }
61014
61015    #[simd_test(enable = "avx512f")]
61016    fn test_mm_maskz_getmant_round_sd() {
61017        let a = _mm_set1_pd(20.);
61018        let b = _mm_set1_pd(10.);
61019        let r = _mm_maskz_getmant_round_sd::<
61020            _MM_MANT_NORM_1_2,
61021            _MM_MANT_SIGN_SRC,
61022            _MM_FROUND_CUR_DIRECTION,
61023        >(0, a, b);
61024        let e = _mm_set_pd(20., 0.);
61025        assert_eq_m128d(r, e);
61026        let r = _mm_maskz_getmant_round_sd::<
61027            _MM_MANT_NORM_1_2,
61028            _MM_MANT_SIGN_SRC,
61029            _MM_FROUND_CUR_DIRECTION,
61030        >(0b11111111, a, b);
61031        let e = _mm_set_pd(20., 1.25);
61032        assert_eq_m128d(r, e);
61033    }
61034
61035    #[simd_test(enable = "avx512f")]
61036    fn test_mm_roundscale_round_ss() {
61037        let a = _mm_set1_ps(2.2);
61038        let b = _mm_set1_ps(1.1);
61039        let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
61040        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
61041        assert_eq_m128(r, e);
61042    }
61043
61044    #[simd_test(enable = "avx512f")]
61045    fn test_mm_mask_roundscale_round_ss() {
61046        let a = _mm_set1_ps(2.2);
61047        let b = _mm_set1_ps(1.1);
61048        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61049        let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
61050        assert_eq_m128(r, e);
61051        let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61052        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
61053        assert_eq_m128(r, e);
61054    }
61055
61056    #[simd_test(enable = "avx512f")]
61057    fn test_mm_maskz_roundscale_round_ss() {
61058        let a = _mm_set1_ps(2.2);
61059        let b = _mm_set1_ps(1.1);
61060        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
61061        let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
61062        assert_eq_m128(r, e);
61063        let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61064        let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
61065        assert_eq_m128(r, e);
61066    }
61067
61068    #[simd_test(enable = "avx512f")]
61069    fn test_mm_roundscale_round_sd() {
61070        let a = _mm_set1_pd(2.2);
61071        let b = _mm_set1_pd(1.1);
61072        let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
61073        let e = _mm_set_pd(2.2, 1.0);
61074        assert_eq_m128d(r, e);
61075    }
61076
61077    #[simd_test(enable = "avx512f")]
61078    fn test_mm_mask_roundscale_round_sd() {
61079        let a = _mm_set1_pd(2.2);
61080        let b = _mm_set1_pd(1.1);
61081        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61082        let e = _mm_set_pd(2.2, 2.2);
61083        assert_eq_m128d(r, e);
61084        let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61085        let e = _mm_set_pd(2.2, 1.0);
61086        assert_eq_m128d(r, e);
61087    }
61088
61089    #[simd_test(enable = "avx512f")]
61090    fn test_mm_maskz_roundscale_round_sd() {
61091        let a = _mm_set1_pd(2.2);
61092        let b = _mm_set1_pd(1.1);
61093        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
61094        let e = _mm_set_pd(2.2, 0.0);
61095        assert_eq_m128d(r, e);
61096        let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61097        let e = _mm_set_pd(2.2, 1.0);
61098        assert_eq_m128d(r, e);
61099    }
61100
61101    #[simd_test(enable = "avx512f")]
61102    fn test_mm_scalef_round_ss() {
61103        let a = _mm_set1_ps(1.);
61104        let b = _mm_set1_ps(3.);
61105        let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
61106        let e = _mm_set_ps(1., 1., 1., 8.);
61107        assert_eq_m128(r, e);
61108    }
61109
61110    #[simd_test(enable = "avx512f")]
61111    fn test_mm_mask_scalef_round_ss() {
61112        let a = _mm_set1_ps(1.);
61113        let b = _mm_set1_ps(3.);
61114        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61115            a, 0, a, b,
61116        );
61117        let e = _mm_set_ps(1., 1., 1., 1.);
61118        assert_eq_m128(r, e);
61119        let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61120            a, 0b11111111, a, b,
61121        );
61122        let e = _mm_set_ps(1., 1., 1., 8.);
61123        assert_eq_m128(r, e);
61124    }
61125
61126    #[simd_test(enable = "avx512f")]
61127    fn test_mm_maskz_scalef_round_ss() {
61128        let a = _mm_set1_ps(1.);
61129        let b = _mm_set1_ps(3.);
61130        let r =
61131            _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
61132        let e = _mm_set_ps(1., 1., 1., 0.);
61133        assert_eq_m128(r, e);
61134        let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61135            0b11111111, a, b,
61136        );
61137        let e = _mm_set_ps(1., 1., 1., 8.);
61138        assert_eq_m128(r, e);
61139    }
61140
61141    #[simd_test(enable = "avx512f")]
61142    fn test_mm_scalef_round_sd() {
61143        let a = _mm_set1_pd(1.);
61144        let b = _mm_set1_pd(3.);
61145        let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
61146        let e = _mm_set_pd(1., 8.);
61147        assert_eq_m128d(r, e);
61148    }
61149
61150    #[simd_test(enable = "avx512f")]
61151    fn test_mm_mask_scalef_round_sd() {
61152        let a = _mm_set1_pd(1.);
61153        let b = _mm_set1_pd(3.);
61154        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61155            a, 0, a, b,
61156        );
61157        let e = _mm_set_pd(1., 1.);
61158        assert_eq_m128d(r, e);
61159        let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61160            a, 0b11111111, a, b,
61161        );
61162        let e = _mm_set_pd(1., 8.);
61163        assert_eq_m128d(r, e);
61164    }
61165
61166    #[simd_test(enable = "avx512f")]
61167    fn test_mm_maskz_scalef_round_sd() {
61168        let a = _mm_set1_pd(1.);
61169        let b = _mm_set1_pd(3.);
61170        let r =
61171            _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
61172        let e = _mm_set_pd(1., 0.);
61173        assert_eq_m128d(r, e);
61174        let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61175            0b11111111, a, b,
61176        );
61177        let e = _mm_set_pd(1., 8.);
61178        assert_eq_m128d(r, e);
61179    }
61180
61181    #[simd_test(enable = "avx512f")]
61182    fn test_mm_fmadd_round_ss() {
61183        let a = _mm_set1_ps(1.);
61184        let b = _mm_set1_ps(2.);
61185        let c = _mm_set1_ps(3.);
61186        let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61187        let e = _mm_set_ps(1., 1., 1., 5.);
61188        assert_eq_m128(r, e);
61189    }
61190
61191    #[simd_test(enable = "avx512f")]
61192    fn test_mm_mask_fmadd_round_ss() {
61193        let a = _mm_set1_ps(1.);
61194        let b = _mm_set1_ps(2.);
61195        let c = _mm_set1_ps(3.);
61196        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61197            a, 0, b, c,
61198        );
61199        assert_eq_m128(r, a);
61200        let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61201            a, 0b11111111, b, c,
61202        );
61203        let e = _mm_set_ps(1., 1., 1., 5.);
61204        assert_eq_m128(r, e);
61205    }
61206
61207    #[simd_test(enable = "avx512f")]
61208    fn test_mm_maskz_fmadd_round_ss() {
61209        let a = _mm_set1_ps(1.);
61210        let b = _mm_set1_ps(2.);
61211        let c = _mm_set1_ps(3.);
61212        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61213            0, a, b, c,
61214        );
61215        let e = _mm_set_ps(1., 1., 1., 0.);
61216        assert_eq_m128(r, e);
61217        let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61218            0b11111111, a, b, c,
61219        );
61220        let e = _mm_set_ps(1., 1., 1., 5.);
61221        assert_eq_m128(r, e);
61222    }
61223
61224    #[simd_test(enable = "avx512f")]
61225    fn test_mm_mask3_fmadd_round_ss() {
61226        let a = _mm_set1_ps(1.);
61227        let b = _mm_set1_ps(2.);
61228        let c = _mm_set1_ps(3.);
61229        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61230            a, b, c, 0,
61231        );
61232        assert_eq_m128(r, c);
61233        let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61234            a, b, c, 0b11111111,
61235        );
61236        let e = _mm_set_ps(3., 3., 3., 5.);
61237        assert_eq_m128(r, e);
61238    }
61239
61240    #[simd_test(enable = "avx512f")]
61241    fn test_mm_fmadd_round_sd() {
61242        let a = _mm_set1_pd(1.);
61243        let b = _mm_set1_pd(2.);
61244        let c = _mm_set1_pd(3.);
61245        let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61246        let e = _mm_set_pd(1., 5.);
61247        assert_eq_m128d(r, e);
61248    }
61249
61250    #[simd_test(enable = "avx512f")]
61251    fn test_mm_mask_fmadd_round_sd() {
61252        let a = _mm_set1_pd(1.);
61253        let b = _mm_set1_pd(2.);
61254        let c = _mm_set1_pd(3.);
61255        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61256            a, 0, b, c,
61257        );
61258        assert_eq_m128d(r, a);
61259        let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61260            a, 0b11111111, b, c,
61261        );
61262        let e = _mm_set_pd(1., 5.);
61263        assert_eq_m128d(r, e);
61264    }
61265
61266    #[simd_test(enable = "avx512f")]
61267    fn test_mm_maskz_fmadd_round_sd() {
61268        let a = _mm_set1_pd(1.);
61269        let b = _mm_set1_pd(2.);
61270        let c = _mm_set1_pd(3.);
61271        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61272            0, a, b, c,
61273        );
61274        let e = _mm_set_pd(1., 0.);
61275        assert_eq_m128d(r, e);
61276        let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61277            0b11111111, a, b, c,
61278        );
61279        let e = _mm_set_pd(1., 5.);
61280        assert_eq_m128d(r, e);
61281    }
61282
61283    #[simd_test(enable = "avx512f")]
61284    fn test_mm_mask3_fmadd_round_sd() {
61285        let a = _mm_set1_pd(1.);
61286        let b = _mm_set1_pd(2.);
61287        let c = _mm_set1_pd(3.);
61288        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61289            a, b, c, 0,
61290        );
61291        assert_eq_m128d(r, c);
61292        let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61293            a, b, c, 0b11111111,
61294        );
61295        let e = _mm_set_pd(3., 5.);
61296        assert_eq_m128d(r, e);
61297    }
61298
61299    #[simd_test(enable = "avx512f")]
61300    fn test_mm_fmsub_round_ss() {
61301        let a = _mm_set1_ps(1.);
61302        let b = _mm_set1_ps(2.);
61303        let c = _mm_set1_ps(3.);
61304        let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61305        let e = _mm_set_ps(1., 1., 1., -1.);
61306        assert_eq_m128(r, e);
61307    }
61308
61309    #[simd_test(enable = "avx512f")]
61310    fn test_mm_mask_fmsub_round_ss() {
61311        let a = _mm_set1_ps(1.);
61312        let b = _mm_set1_ps(2.);
61313        let c = _mm_set1_ps(3.);
61314        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61315            a, 0, b, c,
61316        );
61317        assert_eq_m128(r, a);
61318        let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61319            a, 0b11111111, b, c,
61320        );
61321        let e = _mm_set_ps(1., 1., 1., -1.);
61322        assert_eq_m128(r, e);
61323    }
61324
61325    #[simd_test(enable = "avx512f")]
61326    fn test_mm_maskz_fmsub_round_ss() {
61327        let a = _mm_set1_ps(1.);
61328        let b = _mm_set1_ps(2.);
61329        let c = _mm_set1_ps(3.);
61330        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61331            0, a, b, c,
61332        );
61333        let e = _mm_set_ps(1., 1., 1., 0.);
61334        assert_eq_m128(r, e);
61335        let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61336            0b11111111, a, b, c,
61337        );
61338        let e = _mm_set_ps(1., 1., 1., -1.);
61339        assert_eq_m128(r, e);
61340    }
61341
61342    #[simd_test(enable = "avx512f")]
61343    fn test_mm_mask3_fmsub_round_ss() {
61344        let a = _mm_set1_ps(1.);
61345        let b = _mm_set1_ps(2.);
61346        let c = _mm_set1_ps(3.);
61347        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61348            a, b, c, 0,
61349        );
61350        assert_eq_m128(r, c);
61351        let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61352            a, b, c, 0b11111111,
61353        );
61354        let e = _mm_set_ps(3., 3., 3., -1.);
61355        assert_eq_m128(r, e);
61356    }
61357
61358    #[simd_test(enable = "avx512f")]
61359    fn test_mm_fmsub_round_sd() {
61360        let a = _mm_set1_pd(1.);
61361        let b = _mm_set1_pd(2.);
61362        let c = _mm_set1_pd(3.);
61363        let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61364        let e = _mm_set_pd(1., -1.);
61365        assert_eq_m128d(r, e);
61366    }
61367
61368    #[simd_test(enable = "avx512f")]
61369    fn test_mm_mask_fmsub_round_sd() {
61370        let a = _mm_set1_pd(1.);
61371        let b = _mm_set1_pd(2.);
61372        let c = _mm_set1_pd(3.);
61373        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61374            a, 0, b, c,
61375        );
61376        assert_eq_m128d(r, a);
61377        let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61378            a, 0b11111111, b, c,
61379        );
61380        let e = _mm_set_pd(1., -1.);
61381        assert_eq_m128d(r, e);
61382    }
61383
61384    #[simd_test(enable = "avx512f")]
61385    fn test_mm_maskz_fmsub_round_sd() {
61386        let a = _mm_set1_pd(1.);
61387        let b = _mm_set1_pd(2.);
61388        let c = _mm_set1_pd(3.);
61389        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61390            0, a, b, c,
61391        );
61392        let e = _mm_set_pd(1., 0.);
61393        assert_eq_m128d(r, e);
61394        let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61395            0b11111111, a, b, c,
61396        );
61397        let e = _mm_set_pd(1., -1.);
61398        assert_eq_m128d(r, e);
61399    }
61400
61401    #[simd_test(enable = "avx512f")]
61402    fn test_mm_mask3_fmsub_round_sd() {
61403        let a = _mm_set1_pd(1.);
61404        let b = _mm_set1_pd(2.);
61405        let c = _mm_set1_pd(3.);
61406        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61407            a, b, c, 0,
61408        );
61409        assert_eq_m128d(r, c);
61410        let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61411            a, b, c, 0b11111111,
61412        );
61413        let e = _mm_set_pd(3., -1.);
61414        assert_eq_m128d(r, e);
61415    }
61416
61417    #[simd_test(enable = "avx512f")]
61418    fn test_mm_fnmadd_round_ss() {
61419        let a = _mm_set1_ps(1.);
61420        let b = _mm_set1_ps(2.);
61421        let c = _mm_set1_ps(3.);
61422        let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61423        let e = _mm_set_ps(1., 1., 1., 1.);
61424        assert_eq_m128(r, e);
61425    }
61426
61427    #[simd_test(enable = "avx512f")]
61428    fn test_mm_mask_fnmadd_round_ss() {
61429        let a = _mm_set1_ps(1.);
61430        let b = _mm_set1_ps(2.);
61431        let c = _mm_set1_ps(3.);
61432        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61433            a, 0, b, c,
61434        );
61435        assert_eq_m128(r, a);
61436        let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61437            a, 0b11111111, b, c,
61438        );
61439        let e = _mm_set_ps(1., 1., 1., 1.);
61440        assert_eq_m128(r, e);
61441    }
61442
61443    #[simd_test(enable = "avx512f")]
61444    fn test_mm_maskz_fnmadd_round_ss() {
61445        let a = _mm_set1_ps(1.);
61446        let b = _mm_set1_ps(2.);
61447        let c = _mm_set1_ps(3.);
61448        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61449            0, a, b, c,
61450        );
61451        let e = _mm_set_ps(1., 1., 1., 0.);
61452        assert_eq_m128(r, e);
61453        let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61454            0b11111111, a, b, c,
61455        );
61456        let e = _mm_set_ps(1., 1., 1., 1.);
61457        assert_eq_m128(r, e);
61458    }
61459
61460    #[simd_test(enable = "avx512f")]
61461    fn test_mm_mask3_fnmadd_round_ss() {
61462        let a = _mm_set1_ps(1.);
61463        let b = _mm_set1_ps(2.);
61464        let c = _mm_set1_ps(3.);
61465        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61466            a, b, c, 0,
61467        );
61468        assert_eq_m128(r, c);
61469        let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61470            a, b, c, 0b11111111,
61471        );
61472        let e = _mm_set_ps(3., 3., 3., 1.);
61473        assert_eq_m128(r, e);
61474    }
61475
61476    #[simd_test(enable = "avx512f")]
61477    fn test_mm_fnmadd_round_sd() {
61478        let a = _mm_set1_pd(1.);
61479        let b = _mm_set1_pd(2.);
61480        let c = _mm_set1_pd(3.);
61481        let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61482        let e = _mm_set_pd(1., 1.);
61483        assert_eq_m128d(r, e);
61484    }
61485
61486    #[simd_test(enable = "avx512f")]
61487    fn test_mm_mask_fnmadd_round_sd() {
61488        let a = _mm_set1_pd(1.);
61489        let b = _mm_set1_pd(2.);
61490        let c = _mm_set1_pd(3.);
61491        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61492            a, 0, b, c,
61493        );
61494        assert_eq_m128d(r, a);
61495        let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61496            a, 0b11111111, b, c,
61497        );
61498        let e = _mm_set_pd(1., 1.);
61499        assert_eq_m128d(r, e);
61500    }
61501
61502    #[simd_test(enable = "avx512f")]
61503    fn test_mm_maskz_fnmadd_round_sd() {
61504        let a = _mm_set1_pd(1.);
61505        let b = _mm_set1_pd(2.);
61506        let c = _mm_set1_pd(3.);
61507        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61508            0, a, b, c,
61509        );
61510        let e = _mm_set_pd(1., 0.);
61511        assert_eq_m128d(r, e);
61512        let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61513            0b11111111, a, b, c,
61514        );
61515        let e = _mm_set_pd(1., 1.);
61516        assert_eq_m128d(r, e);
61517    }
61518
61519    #[simd_test(enable = "avx512f")]
61520    fn test_mm_mask3_fnmadd_round_sd() {
61521        let a = _mm_set1_pd(1.);
61522        let b = _mm_set1_pd(2.);
61523        let c = _mm_set1_pd(3.);
61524        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61525            a, b, c, 0,
61526        );
61527        assert_eq_m128d(r, c);
61528        let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61529            a, b, c, 0b11111111,
61530        );
61531        let e = _mm_set_pd(3., 1.);
61532        assert_eq_m128d(r, e);
61533    }
61534
61535    #[simd_test(enable = "avx512f")]
61536    fn test_mm_fnmsub_round_ss() {
61537        let a = _mm_set1_ps(1.);
61538        let b = _mm_set1_ps(2.);
61539        let c = _mm_set1_ps(3.);
61540        let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61541        let e = _mm_set_ps(1., 1., 1., -5.);
61542        assert_eq_m128(r, e);
61543    }
61544
61545    #[simd_test(enable = "avx512f")]
61546    fn test_mm_mask_fnmsub_round_ss() {
61547        let a = _mm_set1_ps(1.);
61548        let b = _mm_set1_ps(2.);
61549        let c = _mm_set1_ps(3.);
61550        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61551            a, 0, b, c,
61552        );
61553        assert_eq_m128(r, a);
61554        let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61555            a, 0b11111111, b, c,
61556        );
61557        let e = _mm_set_ps(1., 1., 1., -5.);
61558        assert_eq_m128(r, e);
61559    }
61560
61561    #[simd_test(enable = "avx512f")]
61562    fn test_mm_maskz_fnmsub_round_ss() {
61563        let a = _mm_set1_ps(1.);
61564        let b = _mm_set1_ps(2.);
61565        let c = _mm_set1_ps(3.);
61566        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61567            0, a, b, c,
61568        );
61569        let e = _mm_set_ps(1., 1., 1., 0.);
61570        assert_eq_m128(r, e);
61571        let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61572            0b11111111, a, b, c,
61573        );
61574        let e = _mm_set_ps(1., 1., 1., -5.);
61575        assert_eq_m128(r, e);
61576    }
61577
61578    #[simd_test(enable = "avx512f")]
61579    fn test_mm_mask3_fnmsub_round_ss() {
61580        let a = _mm_set1_ps(1.);
61581        let b = _mm_set1_ps(2.);
61582        let c = _mm_set1_ps(3.);
61583        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61584            a, b, c, 0,
61585        );
61586        assert_eq_m128(r, c);
61587        let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61588            a, b, c, 0b11111111,
61589        );
61590        let e = _mm_set_ps(3., 3., 3., -5.);
61591        assert_eq_m128(r, e);
61592    }
61593
61594    #[simd_test(enable = "avx512f")]
61595    fn test_mm_fnmsub_round_sd() {
61596        let a = _mm_set1_pd(1.);
61597        let b = _mm_set1_pd(2.);
61598        let c = _mm_set1_pd(3.);
61599        let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
61600        let e = _mm_set_pd(1., -5.);
61601        assert_eq_m128d(r, e);
61602    }
61603
61604    #[simd_test(enable = "avx512f")]
61605    fn test_mm_mask_fnmsub_round_sd() {
61606        let a = _mm_set1_pd(1.);
61607        let b = _mm_set1_pd(2.);
61608        let c = _mm_set1_pd(3.);
61609        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61610            a, 0, b, c,
61611        );
61612        assert_eq_m128d(r, a);
61613        let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61614            a, 0b11111111, b, c,
61615        );
61616        let e = _mm_set_pd(1., -5.);
61617        assert_eq_m128d(r, e);
61618    }
61619
61620    #[simd_test(enable = "avx512f")]
61621    fn test_mm_maskz_fnmsub_round_sd() {
61622        let a = _mm_set1_pd(1.);
61623        let b = _mm_set1_pd(2.);
61624        let c = _mm_set1_pd(3.);
61625        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61626            0, a, b, c,
61627        );
61628        let e = _mm_set_pd(1., 0.);
61629        assert_eq_m128d(r, e);
61630        let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61631            0b11111111, a, b, c,
61632        );
61633        let e = _mm_set_pd(1., -5.);
61634        assert_eq_m128d(r, e);
61635    }
61636
61637    #[simd_test(enable = "avx512f")]
61638    fn test_mm_mask3_fnmsub_round_sd() {
61639        let a = _mm_set1_pd(1.);
61640        let b = _mm_set1_pd(2.);
61641        let c = _mm_set1_pd(3.);
61642        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61643            a, b, c, 0,
61644        );
61645        assert_eq_m128d(r, c);
61646        let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
61647            a, b, c, 0b11111111,
61648        );
61649        let e = _mm_set_pd(3., -5.);
61650        assert_eq_m128d(r, e);
61651    }
61652
61653    #[simd_test(enable = "avx512f")]
61654    fn test_mm_fixupimm_ss() {
61655        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61656        let b = _mm_set1_ps(f32::MAX);
61657        let c = _mm_set1_epi32(i32::MAX);
61658        let r = _mm_fixupimm_ss::<5>(a, b, c);
61659        let e = _mm_set_ps(0., 0., 0., -0.0);
61660        assert_eq_m128(r, e);
61661    }
61662
61663    #[simd_test(enable = "avx512f")]
61664    fn test_mm_mask_fixupimm_ss() {
61665        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61666        let b = _mm_set1_ps(f32::MAX);
61667        let c = _mm_set1_epi32(i32::MAX);
61668        let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
61669        let e = _mm_set_ps(0., 0., 0., -0.0);
61670        assert_eq_m128(r, e);
61671    }
61672
61673    #[simd_test(enable = "avx512f")]
61674    fn test_mm_maskz_fixupimm_ss() {
61675        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61676        let b = _mm_set1_ps(f32::MAX);
61677        let c = _mm_set1_epi32(i32::MAX);
61678        let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
61679        let e = _mm_set_ps(0., 0., 0., 0.0);
61680        assert_eq_m128(r, e);
61681        let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
61682        let e = _mm_set_ps(0., 0., 0., -0.0);
61683        assert_eq_m128(r, e);
61684    }
61685
61686    #[simd_test(enable = "avx512f")]
61687    fn test_mm_fixupimm_sd() {
61688        let a = _mm_set_pd(0., f64::NAN);
61689        let b = _mm_set1_pd(f64::MAX);
61690        let c = _mm_set1_epi64x(i32::MAX as i64);
61691        let r = _mm_fixupimm_sd::<5>(a, b, c);
61692        let e = _mm_set_pd(0., -0.0);
61693        assert_eq_m128d(r, e);
61694    }
61695
61696    #[simd_test(enable = "avx512f")]
61697    fn test_mm_mask_fixupimm_sd() {
61698        let a = _mm_set_pd(0., f64::NAN);
61699        let b = _mm_set1_pd(f64::MAX);
61700        let c = _mm_set1_epi64x(i32::MAX as i64);
61701        let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
61702        let e = _mm_set_pd(0., -0.0);
61703        assert_eq_m128d(r, e);
61704    }
61705
61706    #[simd_test(enable = "avx512f")]
61707    fn test_mm_maskz_fixupimm_sd() {
61708        let a = _mm_set_pd(0., f64::NAN);
61709        let b = _mm_set1_pd(f64::MAX);
61710        let c = _mm_set1_epi64x(i32::MAX as i64);
61711        let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
61712        let e = _mm_set_pd(0., 0.0);
61713        assert_eq_m128d(r, e);
61714        let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
61715        let e = _mm_set_pd(0., -0.0);
61716        assert_eq_m128d(r, e);
61717    }
61718
61719    #[simd_test(enable = "avx512f")]
61720    fn test_mm_fixupimm_round_ss() {
61721        let a = _mm_set_ps(1., 0., 0., f32::NAN);
61722        let b = _mm_set1_ps(f32::MAX);
61723        let c = _mm_set1_epi32(i32::MAX);
61724        let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
61725        let e = _mm_set_ps(1., 0., 0., -0.0);
61726        assert_eq_m128(r, e);
61727    }
61728
61729    #[simd_test(enable = "avx512f")]
61730    fn test_mm_mask_fixupimm_round_ss() {
61731        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61732        let b = _mm_set1_ps(f32::MAX);
61733        let c = _mm_set1_epi32(i32::MAX);
61734        let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
61735        let e = _mm_set_ps(0., 0., 0., -0.0);
61736        assert_eq_m128(r, e);
61737    }
61738
61739    #[simd_test(enable = "avx512f")]
61740    fn test_mm_maskz_fixupimm_round_ss() {
61741        let a = _mm_set_ps(0., 0., 0., f32::NAN);
61742        let b = _mm_set1_ps(f32::MAX);
61743        let c = _mm_set1_epi32(i32::MAX);
61744        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
61745        let e = _mm_set_ps(0., 0., 0., 0.0);
61746        assert_eq_m128(r, e);
61747        let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
61748        let e = _mm_set_ps(0., 0., 0., -0.0);
61749        assert_eq_m128(r, e);
61750    }
61751
61752    #[simd_test(enable = "avx512f")]
61753    fn test_mm_fixupimm_round_sd() {
61754        let a = _mm_set_pd(0., f64::NAN);
61755        let b = _mm_set1_pd(f64::MAX);
61756        let c = _mm_set1_epi64x(i32::MAX as i64);
61757        let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
61758        let e = _mm_set_pd(0., -0.0);
61759        assert_eq_m128d(r, e);
61760    }
61761
61762    #[simd_test(enable = "avx512f")]
61763    fn test_mm_mask_fixupimm_round_sd() {
61764        let a = _mm_set_pd(0., f64::NAN);
61765        let b = _mm_set1_pd(f64::MAX);
61766        let c = _mm_set1_epi64x(i32::MAX as i64);
61767        let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
61768        let e = _mm_set_pd(0., -0.0);
61769        assert_eq_m128d(r, e);
61770    }
61771
61772    #[simd_test(enable = "avx512f")]
61773    fn test_mm_maskz_fixupimm_round_sd() {
61774        let a = _mm_set_pd(0., f64::NAN);
61775        let b = _mm_set1_pd(f64::MAX);
61776        let c = _mm_set1_epi64x(i32::MAX as i64);
61777        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
61778        let e = _mm_set_pd(0., 0.0);
61779        assert_eq_m128d(r, e);
61780        let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
61781        let e = _mm_set_pd(0., -0.0);
61782        assert_eq_m128d(r, e);
61783    }
61784
61785    #[simd_test(enable = "avx512f")]
61786    fn test_mm_mask_cvtss_sd() {
61787        let a = _mm_set_pd(6., -7.5);
61788        let b = _mm_set_ps(0., -0.5, 1., -1.5);
61789        let r = _mm_mask_cvtss_sd(a, 0, a, b);
61790        assert_eq_m128d(r, a);
61791        let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
61792        let e = _mm_set_pd(6., -1.5);
61793        assert_eq_m128d(r, e);
61794    }
61795
61796    #[simd_test(enable = "avx512f")]
61797    fn test_mm_maskz_cvtss_sd() {
61798        let a = _mm_set_pd(6., -7.5);
61799        let b = _mm_set_ps(0., -0.5, 1., -1.5);
61800        let r = _mm_maskz_cvtss_sd(0, a, b);
61801        let e = _mm_set_pd(6., 0.);
61802        assert_eq_m128d(r, e);
61803        let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
61804        let e = _mm_set_pd(6., -1.5);
61805        assert_eq_m128d(r, e);
61806    }
61807
61808    #[simd_test(enable = "avx512f")]
61809    fn test_mm_mask_cvtsd_ss() {
61810        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61811        let b = _mm_set_pd(6., -7.5);
61812        let r = _mm_mask_cvtsd_ss(a, 0, a, b);
61813        assert_eq_m128(r, a);
61814        let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
61815        let e = _mm_set_ps(0., -0.5, 1., -7.5);
61816        assert_eq_m128(r, e);
61817    }
61818
61819    #[simd_test(enable = "avx512f")]
61820    fn test_mm_maskz_cvtsd_ss() {
61821        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61822        let b = _mm_set_pd(6., -7.5);
61823        let r = _mm_maskz_cvtsd_ss(0, a, b);
61824        let e = _mm_set_ps(0., -0.5, 1., 0.);
61825        assert_eq_m128(r, e);
61826        let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
61827        let e = _mm_set_ps(0., -0.5, 1., -7.5);
61828        assert_eq_m128(r, e);
61829    }
61830
61831    #[simd_test(enable = "avx512f")]
61832    fn test_mm_cvt_roundss_sd() {
61833        let a = _mm_set_pd(6., -7.5);
61834        let b = _mm_set_ps(0., -0.5, 1., -1.5);
61835        let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
61836        let e = _mm_set_pd(6., -1.5);
61837        assert_eq_m128d(r, e);
61838    }
61839
61840    #[simd_test(enable = "avx512f")]
61841    fn test_mm_mask_cvt_roundss_sd() {
61842        let a = _mm_set_pd(6., -7.5);
61843        let b = _mm_set_ps(0., -0.5, 1., -1.5);
61844        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
61845        assert_eq_m128d(r, a);
61846        let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
61847        let e = _mm_set_pd(6., -1.5);
61848        assert_eq_m128d(r, e);
61849    }
61850
61851    #[simd_test(enable = "avx512f")]
61852    fn test_mm_maskz_cvt_roundss_sd() {
61853        let a = _mm_set_pd(6., -7.5);
61854        let b = _mm_set_ps(0., -0.5, 1., -1.5);
61855        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
61856        let e = _mm_set_pd(6., 0.);
61857        assert_eq_m128d(r, e);
61858        let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
61859        let e = _mm_set_pd(6., -1.5);
61860        assert_eq_m128d(r, e);
61861    }
61862
61863    #[simd_test(enable = "avx512f")]
61864    fn test_mm_cvt_roundsd_ss() {
61865        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61866        let b = _mm_set_pd(6., -7.5);
61867        let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
61868        let e = _mm_set_ps(0., -0.5, 1., -7.5);
61869        assert_eq_m128(r, e);
61870    }
61871
61872    #[simd_test(enable = "avx512f")]
61873    fn test_mm_mask_cvt_roundsd_ss() {
61874        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61875        let b = _mm_set_pd(6., -7.5);
61876        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
61877        assert_eq_m128(r, a);
61878        let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
61879            a, 0b11111111, a, b,
61880        );
61881        let e = _mm_set_ps(0., -0.5, 1., -7.5);
61882        assert_eq_m128(r, e);
61883    }
61884
61885    #[simd_test(enable = "avx512f")]
61886    fn test_mm_maskz_cvt_roundsd_ss() {
61887        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61888        let b = _mm_set_pd(6., -7.5);
61889        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
61890        let e = _mm_set_ps(0., -0.5, 1., 0.);
61891        assert_eq_m128(r, e);
61892        let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
61893            0b11111111, a, b,
61894        );
61895        let e = _mm_set_ps(0., -0.5, 1., -7.5);
61896        assert_eq_m128(r, e);
61897    }
61898
61899    #[simd_test(enable = "avx512f")]
61900    fn test_mm_cvt_roundss_si32() {
61901        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61902        let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
61903        let e: i32 = -1;
61904        assert_eq!(r, e);
61905    }
61906
61907    #[simd_test(enable = "avx512f")]
61908    fn test_mm_cvt_roundss_i32() {
61909        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61910        let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
61911        let e: i32 = -1;
61912        assert_eq!(r, e);
61913    }
61914
61915    #[simd_test(enable = "avx512f")]
61916    fn test_mm_cvt_roundss_u32() {
61917        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61918        let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
61919        let e: u32 = u32::MAX;
61920        assert_eq!(r, e);
61921    }
61922
61923    #[simd_test(enable = "avx512f")]
61924    fn test_mm_cvtss_i32() {
61925        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61926        let r = _mm_cvtss_i32(a);
61927        let e: i32 = -2;
61928        assert_eq!(r, e);
61929    }
61930
61931    #[simd_test(enable = "avx512f")]
61932    fn test_mm_cvtss_u32() {
61933        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61934        let r = _mm_cvtss_u32(a);
61935        let e: u32 = u32::MAX;
61936        assert_eq!(r, e);
61937    }
61938
61939    #[simd_test(enable = "avx512f")]
61940    fn test_mm_cvt_roundsd_si32() {
61941        let a = _mm_set_pd(1., -1.5);
61942        let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
61943        let e: i32 = -1;
61944        assert_eq!(r, e);
61945    }
61946
61947    #[simd_test(enable = "avx512f")]
61948    fn test_mm_cvt_roundsd_i32() {
61949        let a = _mm_set_pd(1., -1.5);
61950        let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
61951        let e: i32 = -1;
61952        assert_eq!(r, e);
61953    }
61954
61955    #[simd_test(enable = "avx512f")]
61956    fn test_mm_cvt_roundsd_u32() {
61957        let a = _mm_set_pd(1., -1.5);
61958        let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
61959        let e: u32 = u32::MAX;
61960        assert_eq!(r, e);
61961    }
61962
61963    #[simd_test(enable = "avx512f")]
61964    fn test_mm_cvtsd_i32() {
61965        let a = _mm_set_pd(1., -1.5);
61966        let r = _mm_cvtsd_i32(a);
61967        let e: i32 = -2;
61968        assert_eq!(r, e);
61969    }
61970
61971    #[simd_test(enable = "avx512f")]
61972    fn test_mm_cvtsd_u32() {
61973        let a = _mm_set_pd(1., -1.5);
61974        let r = _mm_cvtsd_u32(a);
61975        let e: u32 = u32::MAX;
61976        assert_eq!(r, e);
61977    }
61978
61979    #[simd_test(enable = "avx512f")]
61980    fn test_mm_cvt_roundi32_ss() {
61981        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61982        let b: i32 = 9;
61983        let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
61984        let e = _mm_set_ps(0., -0.5, 1., 9.);
61985        assert_eq_m128(r, e);
61986    }
61987
61988    #[simd_test(enable = "avx512f")]
61989    fn test_mm_cvt_roundsi32_ss() {
61990        let a = _mm_set_ps(0., -0.5, 1., -1.5);
61991        let b: i32 = 9;
61992        let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
61993        let e = _mm_set_ps(0., -0.5, 1., 9.);
61994        assert_eq_m128(r, e);
61995    }
61996
61997    #[simd_test(enable = "avx512f")]
61998    fn test_mm_cvt_roundu32_ss() {
61999        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62000        let b: u32 = 9;
62001        let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
62002        let e = _mm_set_ps(0., -0.5, 1., 9.);
62003        assert_eq_m128(r, e);
62004    }
62005
62006    #[simd_test(enable = "avx512f")]
62007    const fn test_mm_cvti32_ss() {
62008        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62009        let b: i32 = 9;
62010        let r = _mm_cvti32_ss(a, b);
62011        let e = _mm_set_ps(0., -0.5, 1., 9.);
62012        assert_eq_m128(r, e);
62013    }
62014
62015    #[simd_test(enable = "avx512f")]
62016    const fn test_mm_cvti32_sd() {
62017        let a = _mm_set_pd(1., -1.5);
62018        let b: i32 = 9;
62019        let r = _mm_cvti32_sd(a, b);
62020        let e = _mm_set_pd(1., 9.);
62021        assert_eq_m128d(r, e);
62022    }
62023
62024    #[simd_test(enable = "avx512f")]
62025    fn test_mm_cvtt_roundss_si32() {
62026        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62027        let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
62028        let e: i32 = -1;
62029        assert_eq!(r, e);
62030    }
62031
62032    #[simd_test(enable = "avx512f")]
62033    fn test_mm_cvtt_roundss_i32() {
62034        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62035        let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
62036        let e: i32 = -1;
62037        assert_eq!(r, e);
62038    }
62039
62040    #[simd_test(enable = "avx512f")]
62041    fn test_mm_cvtt_roundss_u32() {
62042        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62043        let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
62044        let e: u32 = u32::MAX;
62045        assert_eq!(r, e);
62046    }
62047
62048    #[simd_test(enable = "avx512f")]
62049    fn test_mm_cvttss_i32() {
62050        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62051        let r = _mm_cvttss_i32(a);
62052        let e: i32 = -1;
62053        assert_eq!(r, e);
62054    }
62055
62056    #[simd_test(enable = "avx512f")]
62057    fn test_mm_cvttss_u32() {
62058        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62059        let r = _mm_cvttss_u32(a);
62060        let e: u32 = u32::MAX;
62061        assert_eq!(r, e);
62062    }
62063
62064    #[simd_test(enable = "avx512f")]
62065    fn test_mm_cvtt_roundsd_si32() {
62066        let a = _mm_set_pd(1., -1.5);
62067        let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
62068        let e: i32 = -1;
62069        assert_eq!(r, e);
62070    }
62071
62072    #[simd_test(enable = "avx512f")]
62073    fn test_mm_cvtt_roundsd_i32() {
62074        let a = _mm_set_pd(1., -1.5);
62075        let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
62076        let e: i32 = -1;
62077        assert_eq!(r, e);
62078    }
62079
62080    #[simd_test(enable = "avx512f")]
62081    fn test_mm_cvtt_roundsd_u32() {
62082        let a = _mm_set_pd(1., -1.5);
62083        let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
62084        let e: u32 = u32::MAX;
62085        assert_eq!(r, e);
62086    }
62087
62088    #[simd_test(enable = "avx512f")]
62089    fn test_mm_cvttsd_i32() {
62090        let a = _mm_set_pd(1., -1.5);
62091        let r = _mm_cvttsd_i32(a);
62092        let e: i32 = -1;
62093        assert_eq!(r, e);
62094    }
62095
62096    #[simd_test(enable = "avx512f")]
62097    fn test_mm_cvttsd_u32() {
62098        let a = _mm_set_pd(1., -1.5);
62099        let r = _mm_cvttsd_u32(a);
62100        let e: u32 = u32::MAX;
62101        assert_eq!(r, e);
62102    }
62103
62104    #[simd_test(enable = "avx512f")]
62105    const fn test_mm_cvtu32_ss() {
62106        let a = _mm_set_ps(0., -0.5, 1., -1.5);
62107        let b: u32 = 9;
62108        let r = _mm_cvtu32_ss(a, b);
62109        let e = _mm_set_ps(0., -0.5, 1., 9.);
62110        assert_eq_m128(r, e);
62111    }
62112
62113    #[simd_test(enable = "avx512f")]
62114    const fn test_mm_cvtu32_sd() {
62115        let a = _mm_set_pd(1., -1.5);
62116        let b: u32 = 9;
62117        let r = _mm_cvtu32_sd(a, b);
62118        let e = _mm_set_pd(1., 9.);
62119        assert_eq_m128d(r, e);
62120    }
62121
62122    #[simd_test(enable = "avx512f")]
62123    fn test_mm_comi_round_ss() {
62124        let a = _mm_set1_ps(2.2);
62125        let b = _mm_set1_ps(1.1);
62126        let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
62127        let e: i32 = 0;
62128        assert_eq!(r, e);
62129    }
62130
62131    #[simd_test(enable = "avx512f")]
62132    fn test_mm_comi_round_sd() {
62133        let a = _mm_set1_pd(2.2);
62134        let b = _mm_set1_pd(1.1);
62135        let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
62136        let e: i32 = 0;
62137        assert_eq!(r, e);
62138    }
62139
62140    #[simd_test(enable = "avx512f")]
62141    const fn test_mm512_cvtsi512_si32() {
62142        let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
62143        let r = _mm512_cvtsi512_si32(a);
62144        let e: i32 = 1;
62145        assert_eq!(r, e);
62146    }
62147
62148    #[simd_test(enable = "avx512f")]
62149    const fn test_mm512_cvtss_f32() {
62150        let a = _mm512_setr_ps(
62151            312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
62152        );
62153        assert_eq!(_mm512_cvtss_f32(a), 312.0134);
62154    }
62155
62156    #[simd_test(enable = "avx512f")]
62157    const fn test_mm512_cvtsd_f64() {
62158        let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
62159        assert_eq!(r, -1.1);
62160    }
62161
62162    #[simd_test(enable = "avx512f")]
62163    const fn test_mm512_shuffle_pd() {
62164        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
62165        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
62166        let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
62167        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
62168        assert_eq_m512d(r, e);
62169    }
62170
62171    #[simd_test(enable = "avx512f")]
62172    const fn test_mm512_mask_shuffle_pd() {
62173        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
62174        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
62175        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
62176        assert_eq_m512d(r, a);
62177        let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
62178        let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
62179        assert_eq_m512d(r, e);
62180    }
62181
62182    #[simd_test(enable = "avx512f")]
62183    const fn test_mm512_maskz_shuffle_pd() {
62184        let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
62185        let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
62186        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
62187        assert_eq_m512d(r, _mm512_setzero_pd());
62188        let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
62189        let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
62190        assert_eq_m512d(r, e);
62191    }
62192
62193    #[simd_test(enable = "avx512f")]
62194    unsafe fn test_mm512_mask_expandloadu_epi32() {
62195        let src = _mm512_set1_epi32(42);
62196        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
62197        let p = a.as_ptr();
62198        let m = 0b11101000_11001010;
62199        let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
62200        let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
62201        assert_eq_m512i(r, e);
62202    }
62203
62204    #[simd_test(enable = "avx512f")]
62205    unsafe fn test_mm512_maskz_expandloadu_epi32() {
62206        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
62207        let p = a.as_ptr();
62208        let m = 0b11101000_11001010;
62209        let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
62210        let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
62211        assert_eq_m512i(r, e);
62212    }
62213
62214    #[simd_test(enable = "avx512f,avx512vl")]
62215    unsafe fn test_mm256_mask_expandloadu_epi32() {
62216        let src = _mm256_set1_epi32(42);
62217        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
62218        let p = a.as_ptr();
62219        let m = 0b11101000;
62220        let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
62221        let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
62222        assert_eq_m256i(r, e);
62223    }
62224
62225    #[simd_test(enable = "avx512f,avx512vl")]
62226    unsafe fn test_mm256_maskz_expandloadu_epi32() {
62227        let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
62228        let p = a.as_ptr();
62229        let m = 0b11101000;
62230        let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
62231        let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
62232        assert_eq_m256i(r, e);
62233    }
62234
62235    #[simd_test(enable = "avx512f,avx512vl")]
62236    unsafe fn test_mm_mask_expandloadu_epi32() {
62237        let src = _mm_set1_epi32(42);
62238        let a = &[1_i32, 2, 3, 4];
62239        let p = a.as_ptr();
62240        let m = 0b11111000;
62241        let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
62242        let e = _mm_set_epi32(1, 42, 42, 42);
62243        assert_eq_m128i(r, e);
62244    }
62245
62246    #[simd_test(enable = "avx512f,avx512vl")]
62247    unsafe fn test_mm_maskz_expandloadu_epi32() {
62248        let a = &[1_i32, 2, 3, 4];
62249        let p = a.as_ptr();
62250        let m = 0b11111000;
62251        let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
62252        let e = _mm_set_epi32(1, 0, 0, 0);
62253        assert_eq_m128i(r, e);
62254    }
62255
62256    #[simd_test(enable = "avx512f")]
62257    unsafe fn test_mm512_mask_expandloadu_epi64() {
62258        let src = _mm512_set1_epi64(42);
62259        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
62260        let p = a.as_ptr();
62261        let m = 0b11101000;
62262        let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
62263        let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
62264        assert_eq_m512i(r, e);
62265    }
62266
62267    #[simd_test(enable = "avx512f")]
62268    unsafe fn test_mm512_maskz_expandloadu_epi64() {
62269        let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
62270        let p = a.as_ptr();
62271        let m = 0b11101000;
62272        let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
62273        let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
62274        assert_eq_m512i(r, e);
62275    }
62276
62277    #[simd_test(enable = "avx512f,avx512vl")]
62278    unsafe fn test_mm256_mask_expandloadu_epi64() {
62279        let src = _mm256_set1_epi64x(42);
62280        let a = &[1_i64, 2, 3, 4];
62281        let p = a.as_ptr();
62282        let m = 0b11101000;
62283        let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
62284        let e = _mm256_set_epi64x(1, 42, 42, 42);
62285        assert_eq_m256i(r, e);
62286    }
62287
62288    #[simd_test(enable = "avx512f,avx512vl")]
62289    unsafe fn test_mm256_maskz_expandloadu_epi64() {
62290        let a = &[1_i64, 2, 3, 4];
62291        let p = a.as_ptr();
62292        let m = 0b11101000;
62293        let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
62294        let e = _mm256_set_epi64x(1, 0, 0, 0);
62295        assert_eq_m256i(r, e);
62296    }
62297
62298    #[simd_test(enable = "avx512f,avx512vl")]
62299    unsafe fn test_mm_mask_expandloadu_epi64() {
62300        let src = _mm_set1_epi64x(42);
62301        let a = &[1_i64, 2];
62302        let p = a.as_ptr();
62303        let m = 0b11101000;
62304        let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
62305        let e = _mm_set_epi64x(42, 42);
62306        assert_eq_m128i(r, e);
62307    }
62308
62309    #[simd_test(enable = "avx512f,avx512vl")]
62310    unsafe fn test_mm_maskz_expandloadu_epi64() {
62311        let a = &[1_i64, 2];
62312        let p = a.as_ptr();
62313        let m = 0b11101000;
62314        let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
62315        let e = _mm_set_epi64x(0, 0);
62316        assert_eq_m128i(r, e);
62317    }
62318
62319    #[simd_test(enable = "avx512f")]
62320    unsafe fn test_mm512_mask_expandloadu_ps() {
62321        let src = _mm512_set1_ps(42.);
62322        let a = &[
62323            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
62324        ];
62325        let p = a.as_ptr();
62326        let m = 0b11101000_11001010;
62327        let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
62328        let e = _mm512_set_ps(
62329            8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
62330        );
62331        assert_eq_m512(r, e);
62332    }
62333
62334    #[simd_test(enable = "avx512f")]
62335    unsafe fn test_mm512_maskz_expandloadu_ps() {
62336        let a = &[
62337            1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
62338        ];
62339        let p = a.as_ptr();
62340        let m = 0b11101000_11001010;
62341        let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
62342        let e = _mm512_set_ps(
62343            8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
62344        );
62345        assert_eq_m512(r, e);
62346    }
62347
62348    #[simd_test(enable = "avx512f,avx512vl")]
62349    unsafe fn test_mm256_mask_expandloadu_ps() {
62350        let src = _mm256_set1_ps(42.);
62351        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
62352        let p = a.as_ptr();
62353        let m = 0b11101000;
62354        let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
62355        let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
62356        assert_eq_m256(r, e);
62357    }
62358
62359    #[simd_test(enable = "avx512f,avx512vl")]
62360    unsafe fn test_mm256_maskz_expandloadu_ps() {
62361        let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
62362        let p = a.as_ptr();
62363        let m = 0b11101000;
62364        let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
62365        let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
62366        assert_eq_m256(r, e);
62367    }
62368
62369    #[simd_test(enable = "avx512f,avx512vl")]
62370    unsafe fn test_mm_mask_expandloadu_ps() {
62371        let src = _mm_set1_ps(42.);
62372        let a = &[1.0f32, 2., 3., 4.];
62373        let p = a.as_ptr();
62374        let m = 0b11101000;
62375        let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
62376        let e = _mm_set_ps(1., 42., 42., 42.);
62377        assert_eq_m128(r, e);
62378    }
62379
62380    #[simd_test(enable = "avx512f,avx512vl")]
62381    unsafe fn test_mm_maskz_expandloadu_ps() {
62382        let a = &[1.0f32, 2., 3., 4.];
62383        let p = a.as_ptr();
62384        let m = 0b11101000;
62385        let r = _mm_maskz_expandloadu_ps(m, black_box(p));
62386        let e = _mm_set_ps(1., 0., 0., 0.);
62387        assert_eq_m128(r, e);
62388    }
62389
62390    #[simd_test(enable = "avx512f")]
62391    unsafe fn test_mm512_mask_expandloadu_pd() {
62392        let src = _mm512_set1_pd(42.);
62393        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
62394        let p = a.as_ptr();
62395        let m = 0b11101000;
62396        let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
62397        let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
62398        assert_eq_m512d(r, e);
62399    }
62400
62401    #[simd_test(enable = "avx512f")]
62402    unsafe fn test_mm512_maskz_expandloadu_pd() {
62403        let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
62404        let p = a.as_ptr();
62405        let m = 0b11101000;
62406        let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
62407        let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
62408        assert_eq_m512d(r, e);
62409    }
62410
62411    #[simd_test(enable = "avx512f,avx512vl")]
62412    unsafe fn test_mm256_mask_expandloadu_pd() {
62413        let src = _mm256_set1_pd(42.);
62414        let a = &[1.0f64, 2., 3., 4.];
62415        let p = a.as_ptr();
62416        let m = 0b11101000;
62417        let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
62418        let e = _mm256_set_pd(1., 42., 42., 42.);
62419        assert_eq_m256d(r, e);
62420    }
62421
62422    #[simd_test(enable = "avx512f,avx512vl")]
62423    unsafe fn test_mm256_maskz_expandloadu_pd() {
62424        let a = &[1.0f64, 2., 3., 4.];
62425        let p = a.as_ptr();
62426        let m = 0b11101000;
62427        let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
62428        let e = _mm256_set_pd(1., 0., 0., 0.);
62429        assert_eq_m256d(r, e);
62430    }
62431
62432    #[simd_test(enable = "avx512f,avx512vl")]
62433    unsafe fn test_mm_mask_expandloadu_pd() {
62434        let src = _mm_set1_pd(42.);
62435        let a = &[1.0f64, 2.];
62436        let p = a.as_ptr();
62437        let m = 0b11101000;
62438        let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
62439        let e = _mm_set_pd(42., 42.);
62440        assert_eq_m128d(r, e);
62441    }
62442
62443    #[simd_test(enable = "avx512f,avx512vl")]
62444    unsafe fn test_mm_maskz_expandloadu_pd() {
62445        let a = &[1.0f64, 2.];
62446        let p = a.as_ptr();
62447        let m = 0b11101000;
62448        let r = _mm_maskz_expandloadu_pd(m, black_box(p));
62449        let e = _mm_set_pd(0., 0.);
62450        assert_eq_m128d(r, e);
62451    }
62452}