Skip to content
Snippets Groups Projects
Commit eec2317e authored by Peter Trommler's avatar Peter Trommler :drum:
Browse files

Efficient pdep implementation

parent c9884bcb
No related tags found
No related merge requests found
......@@ -4,40 +4,113 @@
StgWord64
hs_pdep64(StgWord64 src, StgWord64 mask)
{
uint64_t result = 0;
uint64_t m0, mk, mp, mv, t;
uint64_t array[6];
while (1) {
// Mask out all but the lowest bit
const uint64_t lowest = (-mask & mask);
m0 = mask;
mk = ~mask << 1;
if (lowest == 0) {
break;
}
const uint64_t lsb = (uint64_t)((int64_t)(src << 63) >> 63);
result |= lsb & lowest;
mask &= ~lowest;
src >>= 1;
for (int i = 0; i < 6 ; i++) {
mp = mk ^ (mk << 1);
mp = mp ^ (mp << 2);
mp = mp ^ (mp << 4);
mp = mp ^ (mp << 8);
mp = mp ^ (mp << 16);
mp = mp ^ (mp << 32);
mv = mp & mask;
array[i] = mv;
mask = (mask ^ mv) | (mv >> (1 << i));
mk = mk & ~mp;
}
return result;
for (int i = 5; i >= 0; i--) {
mv = array[i];
t = src << (1 << i);
src = (src & ~ mv) | (t & mv);
}
return src & m0;
}
StgWord
hs_pdep32(StgWord src, StgWord mask)
{
return hs_pdep64(src, mask);
uint32_t m0, mk, mp, mv, t;
uint32_t array[5];
m0 = mask;
mk = ~mask << 1;
for (int i = 0; i < 5 ; i++) {
mp = mk ^ (mk << 1);
mp = mp ^ (mp << 2);
mp = mp ^ (mp << 4);
mp = mp ^ (mp << 8);
mp = mp ^ (mp << 16);
mv = mp & mask;
array[i] = mv;
mask = (mask ^ mv) | (mv >> (1 << i));
mk = mk & ~mp;
}
for (int i = 4; i >= 0; i--) {
mv = array[i];
t = src << (1 << i);
src = (src & ~ mv) | (t & mv);
}
return src & m0;
}
StgWord
hs_pdep16(StgWord src, StgWord mask)
{
return hs_pdep64(src, mask);
uint16_t m0, mk, mp, mv, t;
uint16_t array[4];
m0 = mask;
mk = ~mask << 1;
for (int i = 0; i < 4 ; i++) {
mp = mk ^ (mk << 1);
mp = mp ^ (mp << 2);
mp = mp ^ (mp << 4);
mp = mp ^ (mp << 8);
mv = mp & mask;
array[i] = mv;
mask = (mask ^ mv) | (mv >> (1 << i));
mk = mk & ~mp;
}
for (int i = 3; i >= 0; i--) {
mv = array[i];
t = src << (1 << i);
src = (src & ~ mv) | (t & mv);
}
return src & m0;
}
StgWord
hs_pdep8(StgWord src, StgWord mask)
{
return hs_pdep64(src, mask);
uint8_t m0, mk, mp, mv, t;
uint8_t array[3];
m0 = mask;
mk = ~mask << 1;
for (int i = 0; i < 3 ; i++) {
mp = mk ^ (mk << 1);
mp = mp ^ (mp << 2);
mp = mp ^ (mp << 4);
mv = mp & mask;
array[i] = mv;
mask = (mask ^ mv) | (mv >> (1 << i));
mk = mk & ~mp;
}
for (int i = 2; i >= 0; i--) {
mv = array[i];
t = src << (1 << i);
src = (src & ~ mv) | (t & mv);
}
return src & m0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment