I try to broadcast each element in float64 into a single vector. The code is like the following. Note “v_broadcast_element_f32” does not exist !!!. How can we implement the same idea?
float64 a = 1;
float64 b = v_broadcast_element_f32(a, 0);
I try to broadcast each element in float64 into a single vector. The code is like the following. Note “v_broadcast_element_f32” does not exist !!!. How can we implement the same idea?
float64 a = 1;
float64 b = v_broadcast_element_f32(a, 0);
Here is the function to implement the broadcast,
int64 v_broadcast_element_32bits(int64 x, int N)
{
/////////////////////////////////////////////////////////////////////////////////////
// extract bits 0-2 of N to determine the intra-group of shuffle (0-7)
/////////////////////////////////////////////////////////////////////////////////////
char offset = N & 0x7; //bits [2:0]
// add enable bit to shuffle ptrn;
uchar256 shflPtrn = 0x80 + offset;
/////////////////////////////////////////////////////////////////////////////////////
// extract bit 3 of N to determine the inter group of shuffle (group 0/1)
/////////////////////////////////////////////////////////////////////////////////////
int gSel = N & 0x8; //bit [3]
// if group == 1: enable bit for group1
shflPtrn = v_u8_add_b(shflPtrn, 0x20, 0, shflPtrn, (gSel == 0x8));
// shuffle the data in all dual groups
int64 shflData = v_i32_shuffle_b(x, shflPtrn, 0, x);
/////////////////////////////////////////////////////////////////////////////////////
// extract bits 4-5 of N to determine the dual group for _mov_dual_group (dg 0/1/2/3)
/////////////////////////////////////////////////////////////////////////////////////
int dgSel = N & 0x30; //bits [5:4]
// move the chosen shflData to all the dual groups
shflData = v_i32_mov_dual_group_all_b(shflData, 0xFFFFFFFF, 0, 0, 0, 0, MkWrA(0b11, 0b11, 0b11, 0b11), shflData, (dgSel == 0x00));
shflData = v_i32_mov_dual_group_all_b(shflData, 0xFFFFFFFF, 1, 1, 1, 1, MkWrA(0b11, 0b11, 0b11, 0b11), shflData, (dgSel == 0x10));
shflData = v_i32_mov_dual_group_all_b(shflData, 0xFFFFFFFF, 2, 2, 2, 2, MkWrA(0b11, 0b11, 0b11, 0b11), shflData, (dgSel == 0x20));
shflData = v_i32_mov_dual_group_all_b(shflData, 0xFFFFFFFF, 3, 3, 3, 3, MkWrA(0b11, 0b11, 0b11, 0b11), shflData, (dgSel == 0x30));
return shflData;
}
float64 v_broadcast_element_f32(float64 x, int N)
{
return (float64)v_broadcast_element_32bits((int64)x, N);
}