X-Git-Url: https://git.saurik.com/redis.git/blobdiff_plain/e53a1fdd603370b3ac0cf3644253cfce5fac6740..a03c32702b87b99079db1c2083f023dd1f0fc863:/src/bitops.c diff --git a/src/bitops.c b/src/bitops.c index 053db02b..39d24ab7 100644 --- a/src/bitops.c +++ b/src/bitops.c @@ -34,19 +34,28 @@ long popcount(void *s, long count) { uint32_t *p4 = s; static const unsigned char bitsinbyte[256] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8}; - /* Count bits four bytes at a time */ - while(count>=4) { - uint32_t aux = *p4++; - count -= 4; -#ifdef __GNUC__ - /* Unsigned int is always >= 4 bytes if compiler is GCC */ - bits += __builtin_popcount(aux); -#else - bits += bitsinbyte[aux&0xff] + - bitsinbyte[(aux>>8)&0xff] + - bitsinbyte[(aux>>16)&0xff] + - bitsinbyte[(aux>>24)&0xff]; -#endif + /* Count bits 16 bytes at a time */ + while(count>=16) { + uint32_t aux1, aux2, aux3, aux4; + + aux1 = *p4++; + aux2 = *p4++; + aux3 = *p4++; + aux4 = *p4++; + count -= 16; + + aux1 = aux1 - ((aux1 >> 1) & 0x55555555); + aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333); + aux2 = aux2 - ((aux2 >> 1) & 0x55555555); + aux2 = (aux2 & 0x33333333) + ((aux2 >> 2) & 0x33333333); + aux3 = aux3 - ((aux3 >> 1) & 0x55555555); + aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333); + aux4 = aux4 - ((aux4 >> 1) & 0x55555555); + aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333); + bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) + + ((((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) + + ((((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) + + ((((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24); } /* Count the remaining bytes */ p = (unsigned char*)p4; @@ -105,14 +114,14 @@ void setbitCommand(redisClient *c) { o->ptr = sdsgrowzero(o->ptr,byte+1); /* Get current values */ - byteval = ((char*)o->ptr)[byte]; + byteval = ((uint8_t*)o->ptr)[byte]; bit = 7 - (bitoffset & 0x7); bitval = byteval & (1 << bit); /* Update byte with new bit value and return original value */ byteval &= ~(1 << bit); byteval |= ((on & 0x1) << bit); - ((char*)o->ptr)[byte] = byteval; + ((uint8_t*)o->ptr)[byte] = byteval; signalModifiedKey(c->db,c->argv[1]); server.dirty++; addReply(c, bitval ? shared.cone : shared.czero); @@ -139,7 +148,7 @@ void getbitCommand(redisClient *c) { bitval = llbuf[byte] & (1 << bit); } else { if (byte < sdslen(o->ptr)) - bitval = ((char*)o->ptr)[byte] & (1 << bit); + bitval = ((uint8_t*)o->ptr)[byte] & (1 << bit); } addReply(c, bitval ? shared.cone : shared.czero); @@ -150,8 +159,10 @@ void bitopCommand(redisClient *c) { char *opname = c->argv[1]->ptr; robj *o, *targetkey = c->argv[2]; long op, j, numkeys; + robj **objects; /* Array of soruce objects. */ unsigned char **src; /* Array of source strings pointers. */ long *len, maxlen = 0; /* Array of length of src strings, and max len. */ + long minlen = 0; /* Min len among the input keys. */ unsigned char *res = NULL; /* Resulting string. */ /* Parse the operation name. */ @@ -178,23 +189,33 @@ void bitopCommand(redisClient *c) { numkeys = c->argc - 3; src = zmalloc(sizeof(unsigned char*) * numkeys); len = zmalloc(sizeof(long) * numkeys); + objects = zmalloc(sizeof(robj*) * numkeys); for (j = 0; j < numkeys; j++) { o = lookupKeyRead(c->db,c->argv[j+3]); /* Handle non-existing keys as empty strings. */ if (o == NULL) { + objects[j] = NULL; src[j] = NULL; len[j] = 0; + minlen = 0; continue; } /* Return an error if one of the keys is not a string. */ if (checkType(c,o,REDIS_STRING)) { + for (j = j-1; j >= 0; j--) { + if (objects[j]) + decrRefCount(objects[j]); + } zfree(src); zfree(len); + zfree(objects); return; } - src[j] = o->ptr; - len[j] = sdslen(o->ptr); + objects[j] = getDecodedObject(o); + src[j] = objects[j]->ptr; + len[j] = sdslen(objects[j]->ptr); if (len[j] > maxlen) maxlen = len[j]; + if (j == 0 || len[j] < minlen) minlen = len[j]; } /* Compute the bit operation, if at least one string is not empty. */ @@ -203,7 +224,73 @@ void bitopCommand(redisClient *c) { unsigned char output, byte; long i; - for (j = 0; j < maxlen; j++) { + /* Fast path: as far as we have data for all the input bitmaps we + * can take a fast path that performs much better than the + * vanilla algorithm. */ + j = 0; + if (minlen && numkeys <= 16) { + unsigned long *lp[16]; + unsigned long *lres = (unsigned long*) res; + + /* Note: sds pointer is always aligned to 8 byte boundary. */ + memcpy(lp,src,sizeof(unsigned long*)*numkeys); + memcpy(res,src[0],minlen); + + /* Different branches per different operations for speed (sorry). */ + if (op == BITOP_AND) { + while(minlen >= sizeof(unsigned long)*4) { + for (i = 1; i < numkeys; i++) { + lres[0] &= lp[i][0]; + lres[1] &= lp[i][1]; + lres[2] &= lp[i][2]; + lres[3] &= lp[i][3]; + lp[i]+=4; + } + lres+=4; + j += sizeof(unsigned long)*4; + minlen -= sizeof(unsigned long)*4; + } + } else if (op == BITOP_OR) { + while(minlen >= sizeof(unsigned long)*4) { + for (i = 1; i < numkeys; i++) { + lres[0] |= lp[i][0]; + lres[1] |= lp[i][1]; + lres[2] |= lp[i][2]; + lres[3] |= lp[i][3]; + lp[i]+=4; + } + lres+=4; + j += sizeof(unsigned long)*4; + minlen -= sizeof(unsigned long)*4; + } + } else if (op == BITOP_XOR) { + while(minlen >= sizeof(unsigned long)*4) { + for (i = 1; i < numkeys; i++) { + lres[0] ^= lp[i][0]; + lres[1] ^= lp[i][1]; + lres[2] ^= lp[i][2]; + lres[3] ^= lp[i][3]; + lp[i]+=4; + } + lres+=4; + j += sizeof(unsigned long)*4; + minlen -= sizeof(unsigned long)*4; + } + } else if (op == BITOP_NOT) { + while(minlen >= sizeof(unsigned long)*4) { + lres[0] = ~lres[0]; + lres[1] = ~lres[1]; + lres[2] = ~lres[2]; + lres[3] = ~lres[3]; + lres+=4; + j += sizeof(unsigned long)*4; + minlen -= sizeof(unsigned long)*4; + } + } + } + + /* j is set to the next byte to process by the previous loop. */ + for (; j < maxlen; j++) { output = (len[0] <= j) ? 0 : src[0][j]; if (op == BITOP_NOT) output = ~output; for (i = 1; i < numkeys; i++) { @@ -217,8 +304,13 @@ void bitopCommand(redisClient *c) { res[j] = output; } } + for (j = 0; j < numkeys; j++) { + if (objects[j]) + decrRefCount(objects[j]); + } zfree(src); zfree(len); + zfree(objects); /* Store the computed value into the target key */ if (maxlen) { @@ -235,10 +327,9 @@ void bitopCommand(redisClient *c) { /* BITCOUNT key [start end] */ void bitcountCommand(redisClient *c) { robj *o; - long start, end; + long start, end, strlen; unsigned char *p; char llbuf[32]; - size_t strlen; /* Lookup, check for type, and return 0 for non existing keys. */ if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL || @@ -265,7 +356,7 @@ void bitcountCommand(redisClient *c) { if (end < 0) end = strlen+end; if (start < 0) start = 0; if (end < 0) end = 0; - if ((unsigned)end >= strlen) end = strlen-1; + if (end >= strlen) end = strlen-1; } else if (c->argc == 2) { /* The whole string. */ start = 0;