icuSources/tools/toolutil/denseranges.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 *******************************************************************************
   5 *   Copyright (C) 2010, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 *******************************************************************************
   8 *   file name:  denseranges.cpp
   9 *   encoding:   UTF-8
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2010sep25
  14 *   created by: Markus W. Scherer
  15 *
  16 * Helper code for finding a small number of dense ranges.
  17 */
  18
  19 #include "unicode/utypes.h"
  20 #include "denseranges.h"
  21
  22 // Definitions in the anonymous namespace are invisible outside this file.
  23 namespace {
  24
  25 /**
  26  * Collect up to 15 range gaps and sort them by ascending gap size.
  27  */
  28 class LargestGaps {
  29 public:
  30     LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
  31
  32     void add(int32_t gapStart, int64_t gapLength) {
  33         int32_t i=length;
  34         while(i>0 && gapLength>gapLengths[i-1]) {
  35             --i;
  36         }
  37         if(i<maxLength) {
  38             // The new gap is now one of the maxLength largest.
  39             // Insert the new gap, moving up smaller ones of the previous
  40             // length largest.
  41             int32_t j= length<maxLength ? length++ : maxLength-1;
  42             while(j>i) {
  43                 gapStarts[j]=gapStarts[j-1];
  44                 gapLengths[j]=gapLengths[j-1];
  45                 --j;
  46             }
  47             gapStarts[i]=gapStart;
  48             gapLengths[i]=gapLength;
  49         }
  50     }
  51
  52     void truncate(int32_t newLength) {
  53         if(newLength<length) {
  54             length=newLength;
  55         }
  56     }
  57
  58     int32_t count() const { return length; }
  59     int32_t gapStart(int32_t i) const { return gapStarts[i]; }
  60     int64_t gapLength(int32_t i) const { return gapLengths[i]; }
  61
  62     int32_t firstAfter(int32_t value) const {
  63         if(length==0) {
  64             return -1;
  65         }
  66         int32_t minValue=0;
  67         int32_t minIndex=-1;
  68         for(int32_t i=0; i<length; ++i) {
  69             if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
  70                 minValue=gapStarts[i];
  71                 minIndex=i;
  72             }
  73         }
  74         return minIndex;
  75     }
  76
  77 private:
  78     static const int32_t kCapacity=15;
  79
  80     int32_t maxLength;
  81     int32_t length;
  82     int32_t gapStarts[kCapacity];
  83     int64_t gapLengths[kCapacity];
  84 };
  85
  86 }  // namespace
  87
  88 /**
  89  * Does it make sense to write 1..capacity ranges?
  90  * Returns 0 if not, otherwise the number of ranges.
  91  * @param values Sorted array of signed-integer values.
  92  * @param length Number of values.
  93  * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
  94  *                Should be 0x80..0x100, must be 1..0x100.
  95  * @param ranges Output ranges array.
  96  * @param capacity Maximum number of ranges.
  97  * @return Minimum number of ranges (at most capacity) that have the desired density,
  98  *         or 0 if that density cannot be achieved.
  99  */
 100 U_CAPI int32_t U_EXPORT2
 101 uprv_makeDenseRanges(const int32_t values[], int32_t length,
 102                      int32_t density,
 103                      int32_t ranges[][2], int32_t capacity) {
 104     if(length<=2) {
 105         return 0;
 106     }
 107     int32_t minValue=values[0];
 108     int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
 109     // Use int64_t variables for intermediate-value precision and to avoid
 110     // signed-int32_t overflow of maxValue-minValue.
 111     int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
 112     if(length>=(density*maxLength)/0x100) {
 113         // Use one range.
 114         ranges[0][0]=minValue;
 115         ranges[0][1]=maxValue;
 116         return 1;
 117     }
 118     if(length<=4) {
 119         return 0;
 120     }
 121     // See if we can split [minValue, maxValue] into 2..capacity ranges,
 122     // divided by the 1..(capacity-1) largest gaps.
 123     LargestGaps gaps(capacity-1);
 124     int32_t i;
 125     int32_t expectedValue=minValue;
 126     for(i=1; i<length; ++i) {
 127         ++expectedValue;
 128         int32_t actualValue=values[i];
 129         if(expectedValue!=actualValue) {
 130             gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
 131             expectedValue=actualValue;
 132         }
 133     }
 134     // We know gaps.count()>=1 because we have fewer values (length) than
 135     // the length of the [minValue..maxValue] range (maxLength).
 136     // (Otherwise we would have returned with the one range above.)
 137     int32_t num;
 138     for(i=0, num=2;; ++i, ++num) {
 139         if(i>=gaps.count()) {
 140             // The values are too sparse for capacity or fewer ranges
 141             // of the requested density.
 142             return 0;
 143         }
 144         maxLength-=gaps.gapLength(i);
 145         if(length>num*2 && length>=(density*maxLength)/0x100) {
 146             break;
 147         }
 148     }
 149     // Use the num ranges with the num-1 largest gaps.
 150     gaps.truncate(num-1);
 151     ranges[0][0]=minValue;
 152     for(i=0; i<=num-2; ++i) {
 153         int32_t gapIndex=gaps.firstAfter(minValue);
 154         int32_t gapStart=gaps.gapStart(gapIndex);
 155         ranges[i][1]=gapStart-1;
 156         ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
 157     }
 158     ranges[num-1][1]=maxValue;
 159     return num;
 160 }