uint32_t *p4 = s;
static const unsigned char bitsinbyte[256] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8};
- /* Count bits four bytes at a time */
- while(count>=4) {
- uint32_t aux = *p4++;
- count -= 4;
-#ifdef __GNUC__
- /* Unsigned int is always >= 4 bytes if compiler is GCC */
- bits += __builtin_popcount(aux);
-#else
- bits += bitsinbyte[aux&0xff] +
- bitsinbyte[(aux>>8)&0xff] +
- bitsinbyte[(aux>>16)&0xff] +
- bitsinbyte[(aux>>24)&0xff];
-#endif
+ /* Count bits 16 bytes at a time */
+ while(count>=16) {
+ uint32_t aux1, aux2, aux3, aux4;
+
+ aux1 = *p4++;
+ aux2 = *p4++;
+ aux3 = *p4++;
+ aux4 = *p4++;
+ count -= 16;
+
+ aux1 = aux1 - ((aux1 >> 1) & 0x55555555);
+ aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333);
+ aux2 = aux2 - ((aux2 >> 1) & 0x55555555);
+ aux2 = (aux2 & 0x33333333) + ((aux2 >> 2) & 0x33333333);
+ aux3 = aux3 - ((aux3 >> 1) & 0x55555555);
+ aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333);
+ aux4 = aux4 - ((aux4 >> 1) & 0x55555555);
+ aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333);
+ bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
+ ((((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
+ ((((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
+ ((((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24);
}
/* Count the remaining bytes */
p = (unsigned char*)p4;
char *opname = c->argv[1]->ptr;
robj *o, *targetkey = c->argv[2];
long op, j, numkeys;
+ robj **objects; /* Array of soruce objects. */
unsigned char **src; /* Array of source strings pointers. */
long *len, maxlen = 0; /* Array of length of src strings, and max len. */
+ long minlen = 0; /* Min len among the input keys. */
unsigned char *res = NULL; /* Resulting string. */
/* Parse the operation name. */
numkeys = c->argc - 3;
src = zmalloc(sizeof(unsigned char*) * numkeys);
len = zmalloc(sizeof(long) * numkeys);
+ objects = zmalloc(sizeof(robj*) * numkeys);
for (j = 0; j < numkeys; j++) {
o = lookupKeyRead(c->db,c->argv[j+3]);
/* Handle non-existing keys as empty strings. */
if (o == NULL) {
+ objects[j] = NULL;
src[j] = NULL;
len[j] = 0;
+ minlen = 0;
continue;
}
/* Return an error if one of the keys is not a string. */
if (checkType(c,o,REDIS_STRING)) {
+ for (j = j-1; j >= 0; j--) {
+ if (objects[j])
+ decrRefCount(objects[j]);
+ }
zfree(src);
zfree(len);
+ zfree(objects);
return;
}
- src[j] = o->ptr;
- len[j] = sdslen(o->ptr);
+ objects[j] = getDecodedObject(o);
+ src[j] = objects[j]->ptr;
+ len[j] = sdslen(objects[j]->ptr);
if (len[j] > maxlen) maxlen = len[j];
+ if (j == 0 || len[j] < minlen) minlen = len[j];
}
/* Compute the bit operation, if at least one string is not empty. */
unsigned char output, byte;
long i;
- for (j = 0; j < maxlen; j++) {
+ /* Fast path: as far as we have data for all the input bitmaps we
+ * can take a fast path that performs much better than the
+ * vanilla algorithm. */
+ j = 0;
+ if (minlen && numkeys <= 16) {
+ unsigned long *lp[16];
+ unsigned long *lres = (unsigned long*) res;
+
+ /* Note: sds pointer is always aligned to 8 byte boundary. */
+ memcpy(lp,src,sizeof(unsigned long*)*numkeys);
+ memcpy(res,src[0],minlen);
+
+ /* Different branches per different operations for speed (sorry). */
+ if (op == BITOP_AND) {
+ while(minlen >= sizeof(unsigned long)*4) {
+ for (i = 1; i < numkeys; i++) {
+ lres[0] &= lp[i][0];
+ lres[1] &= lp[i][1];
+ lres[2] &= lp[i][2];
+ lres[3] &= lp[i][3];
+ lp[i]+=4;
+ }
+ lres+=4;
+ j += sizeof(unsigned long)*4;
+ minlen -= sizeof(unsigned long)*4;
+ }
+ } else if (op == BITOP_OR) {
+ while(minlen >= sizeof(unsigned long)*4) {
+ for (i = 1; i < numkeys; i++) {
+ lres[0] |= lp[i][0];
+ lres[1] |= lp[i][1];
+ lres[2] |= lp[i][2];
+ lres[3] |= lp[i][3];
+ lp[i]+=4;
+ }
+ lres+=4;
+ j += sizeof(unsigned long)*4;
+ minlen -= sizeof(unsigned long)*4;
+ }
+ } else if (op == BITOP_XOR) {
+ while(minlen >= sizeof(unsigned long)*4) {
+ for (i = 1; i < numkeys; i++) {
+ lres[0] ^= lp[i][0];
+ lres[1] ^= lp[i][1];
+ lres[2] ^= lp[i][2];
+ lres[3] ^= lp[i][3];
+ lp[i]+=4;
+ }
+ lres+=4;
+ j += sizeof(unsigned long)*4;
+ minlen -= sizeof(unsigned long)*4;
+ }
+ } else if (op == BITOP_NOT) {
+ while(minlen >= sizeof(unsigned long)*4) {
+ lres[0] = ~lres[0];
+ lres[1] = ~lres[1];
+ lres[2] = ~lres[2];
+ lres[3] = ~lres[3];
+ lres+=4;
+ j += sizeof(unsigned long)*4;
+ minlen -= sizeof(unsigned long)*4;
+ }
+ }
+ }
+
+ /* j is set to the next byte to process by the previous loop. */
+ for (; j < maxlen; j++) {
output = (len[0] <= j) ? 0 : src[0][j];
if (op == BITOP_NOT) output = ~output;
for (i = 1; i < numkeys; i++) {
res[j] = output;
}
}
+ for (j = 0; j < numkeys; j++) {
+ if (objects[j])
+ decrRefCount(objects[j]);
+ }
zfree(src);
zfree(len);
+ zfree(objects);
/* Store the computed value into the target key */
if (maxlen) {