[apple/icu.git] / icuSources / tools / toolutil / denseranges.cpp

/*
*******************************************************************************
*   Copyright (C) 2010, International Business Machines
*   Corporation and others.  All Rights Reserved.
*******************************************************************************
*   file name:  denseranges.cpp
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2010sep25
*   created by: Markus W. Scherer
*
* Helper code for finding a small number of dense ranges.
*/

#include "unicode/utypes.h"
#include "denseranges.h"

// Definitions in the anonymous namespace are invisible outside this file.
namespace {

/**
 * Collect up to 15 range gaps and sort them by ascending gap size.
 */
class LargestGaps {
public:
    LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}

    void add(int32_t gapStart, int64_t gapLength) {
        int32_t i=length;
        while(i>0 && gapLength>gapLengths[i-1]) {
            --i;
        }
        if(i<maxLength) {
            // The new gap is now one of the maxLength largest.
            // Insert the new gap, moving up smaller ones of the previous
            // length largest.
            int32_t j= length<maxLength ? length++ : maxLength-1;
            while(j>i) {
                gapStarts[j]=gapStarts[j-1];
                gapLengths[j]=gapLengths[j-1];
                --j;
            }
            gapStarts[i]=gapStart;
            gapLengths[i]=gapLength;
        }
    }

    void truncate(int32_t newLength) {
        if(newLength<length) {
            length=newLength;
        }
    }

    int32_t count() const { return length; }
    int32_t gapStart(int32_t i) const { return gapStarts[i]; }
    int64_t gapLength(int32_t i) const { return gapLengths[i]; }

    int32_t firstAfter(int32_t value) const {
        if(length==0) {
            return -1;
        }
        int32_t minValue=0;
        int32_t minIndex=-1;
        for(int32_t i=0; i<length; ++i) {
            if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
                minValue=gapStarts[i];
                minIndex=i;
            }
        }
        return minIndex;
    }

private:
    static const int32_t kCapacity=15;

    int32_t maxLength;
    int32_t length;
    int32_t gapStarts[kCapacity];
    int64_t gapLengths[kCapacity];
};

}  // namespace

/**
 * Does it make sense to write 1..capacity ranges?
 * Returns 0 if not, otherwise the number of ranges.
 * @param values Sorted array of signed-integer values.
 * @param length Number of values.
 * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
 *                Should be 0x80..0x100, must be 1..0x100.
 * @param ranges Output ranges array.
 * @param capacity Maximum number of ranges.
 * @return Minimum number of ranges (at most capacity) that have the desired density,
 *         or 0 if that density cannot be achieved.
 */
U_CAPI int32_t U_EXPORT2
uprv_makeDenseRanges(const int32_t values[], int32_t length,
                     int32_t density,
                     int32_t ranges[][2], int32_t capacity) {
    if(length<=2) {
        return 0;
    }
    int32_t minValue=values[0];
    int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
    // Use int64_t variables for intermediate-value precision and to avoid
    // signed-int32_t overflow of maxValue-minValue.
    int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
    if(length>=(density*maxLength)/0x100) {
        // Use one range.
        ranges[0][0]=minValue;
        ranges[0][1]=maxValue;
        return 1;
    }
    if(length<=4) {
        return 0;
    }
    // See if we can split [minValue, maxValue] into 2..capacity ranges,
    // divided by the 1..(capacity-1) largest gaps.
    LargestGaps gaps(capacity-1);
    int32_t i;
    int32_t expectedValue=minValue;
    for(i=1; i<length; ++i) {
        ++expectedValue;
        int32_t actualValue=values[i];
        if(expectedValue!=actualValue) {
            gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
            expectedValue=actualValue;
        }
    }
    // We know gaps.count()>=1 because we have fewer values (length) than
    // the length of the [minValue..maxValue] range (maxLength).
    // (Otherwise we would have returned with the one range above.)
    int32_t num;
    for(i=0, num=2;; ++i, ++num) {
        if(i>=gaps.count()) {
            // The values are too sparse for capacity or fewer ranges
            // of the requested density.
            return 0;
        }
        maxLength-=gaps.gapLength(i);
        if(length>num*2 && length>=(density*maxLength)/0x100) {
            break;
        }
    }
    // Use the num ranges with the num-1 largest gaps.
    gaps.truncate(num-1);
    ranges[0][0]=minValue;
    for(i=0; i<=num-2; ++i) {
        int32_t gapIndex=gaps.firstAfter(minValue);
        int32_t gapStart=gaps.gapStart(gapIndex);
        ranges[i][1]=gapStart-1;
        ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
    }
    ranges[num-1][1]=maxValue;
    return num;
}
Commit	Line	Data
4388f060 A	1	/*
	2	*******************************************************************************
	3	* Copyright (C) 2010, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	*******************************************************************************
	6	* file name: denseranges.cpp
	7	* encoding: US-ASCII
	8	* tab size: 8 (not used)
	9	* indentation:4
	10	*
	11	* created on: 2010sep25
	12	* created by: Markus W. Scherer
	13	*
	14	* Helper code for finding a small number of dense ranges.
	15	*/
	16
	17	#include "unicode/utypes.h"
	18	#include "denseranges.h"
	19
	20	// Definitions in the anonymous namespace are invisible outside this file.
	21	namespace {
	22
	23	/**
	24	* Collect up to 15 range gaps and sort them by ascending gap size.
	25	*/
	26	class LargestGaps {
	27	public:
	28	LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
	29
	30	void add(int32_t gapStart, int64_t gapLength) {
	31	int32_t i=length;
	32	while(i>0 && gapLength>gapLengths[i-1]) {
	33	--i;
	34	}
	35	if(i<maxLength) {
	36	// The new gap is now one of the maxLength largest.
	37	// Insert the new gap, moving up smaller ones of the previous
	38	// length largest.
	39	int32_t j= length<maxLength ? length++ : maxLength-1;
	40	while(j>i) {
	41	gapStarts[j]=gapStarts[j-1];
	42	gapLengths[j]=gapLengths[j-1];
	43	--j;
	44	}
	45	gapStarts[i]=gapStart;
	46	gapLengths[i]=gapLength;
	47	}
	48	}
	49
	50	void truncate(int32_t newLength) {
	51	if(newLength<length) {
	52	length=newLength;
	53	}
	54	}
	55
	56	int32_t count() const { return length; }
	57	int32_t gapStart(int32_t i) const { return gapStarts[i]; }
	58	int64_t gapLength(int32_t i) const { return gapLengths[i]; }
	59
	60	int32_t firstAfter(int32_t value) const {
	61	if(length==0) {
	62	return -1;
	63	}
	64	int32_t minValue=0;
65	int32_t minIndex=-1;
66	for(int32_t i=0; i<length; ++i) {
67	if(value<gapStarts[i] && (minIndex<0 \|\| gapStarts[i]<minValue)) {
68	minValue=gapStarts[i];
69	minIndex=i;
70	}
71	}
72	return minIndex;
73	}
74
75	private:
76	static const int32_t kCapacity=15;
77
78	int32_t maxLength;
79	int32_t length;
80	int32_t gapStarts[kCapacity];
81	int64_t gapLengths[kCapacity];
82	};
83
84	} // namespace
85
86	/**
87	* Does it make sense to write 1..capacity ranges?
88	* Returns 0 if not, otherwise the number of ranges.
89	* @param values Sorted array of signed-integer values.
90	* @param length Number of values.
91	* @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
92	* Should be 0x80..0x100, must be 1..0x100.
93	* @param ranges Output ranges array.
94	* @param capacity Maximum number of ranges.
95	* @return Minimum number of ranges (at most capacity) that have the desired density,
96	* or 0 if that density cannot be achieved.
97	*/
98	U_CAPI int32_t U_EXPORT2
99	uprv_makeDenseRanges(const int32_t values[], int32_t length,
100	int32_t density,
101	int32_t ranges[][2], int32_t capacity) {
102	if(length<=2) {
103	return 0;
104	}
105	int32_t minValue=values[0];
106	int32_t maxValue=values[length-1]; // Assume minValue<=maxValue.
107	// Use int64_t variables for intermediate-value precision and to avoid
108	// signed-int32_t overflow of maxValue-minValue.
109	int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
110	if(length>=(density*maxLength)/0x100) {
111	// Use one range.
112	ranges[0][0]=minValue;
113	ranges[0][1]=maxValue;
114	return 1;
115	}
116	if(length<=4) {
117	return 0;
118	}
119	// See if we can split [minValue, maxValue] into 2..capacity ranges,
120	// divided by the 1..(capacity-1) largest gaps.
121	LargestGaps gaps(capacity-1);
122	int32_t i;
123	int32_t expectedValue=minValue;
124	for(i=1; i<length; ++i) {
125	++expectedValue;
126	int32_t actualValue=values[i];
127	if(expectedValue!=actualValue) {
128	gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
129	expectedValue=actualValue;
130	}
131	}
132	// We know gaps.count()>=1 because we have fewer values (length) than
133	// the length of the [minValue..maxValue] range (maxLength).
134	// (Otherwise we would have returned with the one range above.)
135	int32_t num;
136	for(i=0, num=2;; ++i, ++num) {
137	if(i>=gaps.count()) {
138	// The values are too sparse for capacity or fewer ranges
139	// of the requested density.
140	return 0;
141	}
142	maxLength-=gaps.gapLength(i);
143	if(length>num2 && length>=(densitymaxLength)/0x100) {
144	break;
145	}
146	}
147	// Use the num ranges with the num-1 largest gaps.
148	gaps.truncate(num-1);
149	ranges[0][0]=minValue;
150	for(i=0; i<=num-2; ++i) {
151	int32_t gapIndex=gaps.firstAfter(minValue);
152	int32_t gapStart=gaps.gapStart(gapIndex);
153	ranges[i][1]=gapStart-1;
154	ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
155	}
156	ranges[num-1][1]=maxValue;
157	return num;
158	}