]> git.saurik.com Git - apple/shell_cmds.git/blob - window/compress.c
shell_cmds-17.1.tar.gz
[apple/shell_cmds.git] / window / compress.c
1 /* $NetBSD: compress.c,v 1.4 1997/11/21 08:35:56 lukem Exp $ */
2
3 /*
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Edward Wang at The University of California, Berkeley.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)compress.c 8.1 (Berkeley) 6/6/93";
43 #else
44 __RCSID("$NetBSD: compress.c,v 1.4 1997/11/21 08:35:56 lukem Exp $");
45 #endif
46 #endif /* not lint */
47
48 #include <fcntl.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include "defs.h"
53 #include "tt.h"
54
55 /* special */
56 int cc_trace = 0;
57 FILE *cc_trace_fp;
58
59 /* tunable parameters */
60
61 int cc_reverse = 1;
62 int cc_sort = 0;
63 int cc_chop = 0;
64
65 int cc_token_max = 8; /* <= TOKEN_MAX */
66 int cc_token_min = 2; /* > tt.tt_put_token_cost */
67 int cc_npass0 = 1;
68 int cc_npass1 = 1;
69
70 int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */
71
72 int cc_ntoken = 8192;
73
74 #define cc_weight XXX
75 #ifndef cc_weight
76 int cc_weight = 0;
77 #endif
78
79 #define TOKEN_MAX 16
80
81 struct cc {
82 char string[TOKEN_MAX];
83 char length;
84 char flag;
85 #ifndef cc_weight
86 short weight;
87 #endif
88 long time; /* time last seen */
89 short bcount; /* count in this buffer */
90 short ccount; /* count in compression */
91 short places; /* places in the buffer */
92 short code; /* token code */
93 struct cc *qforw, *qback;
94 struct cc *hforw, **hback;
95 };
96
97 short cc_thresholds[TOKEN_MAX + 1];
98 #define thresh(length) (cc_thresholds[length])
99 #define threshp(code, count, length) \
100 ((code) >= 0 || (short) (count) >= cc_thresholds[length])
101
102 #ifndef cc_weight
103 short cc_wthresholds[TOKEN_MAX + 1];
104 #define wthresh(length) (cc_wthresholds[length])
105 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
106 #else
107 #define wthreshp(weight, length) (0)
108 #endif
109
110 #ifndef cc_weight
111 short cc_wlimits[TOKEN_MAX + 1];
112 #define wlimit(length) (cc_wlimits[length])
113 #endif
114
115 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
116
117 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
118 #define score_adjust(score, p) \
119 do { \
120 int length = (p)->length; \
121 int ccount = (p)->ccount; \
122 if (threshp((p)->code, ccount, length) || \
123 wthreshp((p)->weight, length)) /* XXX */ \
124 (score) -= length - tt.tt_put_token_cost; \
125 else \
126 (score) += cc_score_adjustments[length][ccount]; \
127 } while (0)
128
129 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
130
131 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
132
133 #define qinsert(p1, p2) \
134 do { \
135 struct cc *forw = (p1)->qforw; \
136 struct cc *back = (p1)->qback; \
137 back->qforw = forw; \
138 forw->qback = back; \
139 forw = (p2)->qforw; \
140 (p1)->qforw = forw; \
141 forw->qback = (p1); \
142 (p2)->qforw = (p1); \
143 (p1)->qback = (p2); \
144 } while (0)
145
146 #define qinsertq(q, p) \
147 ((q)->qforw == (q) ? 0 : \
148 ((q)->qback->qforw = (p)->qforw, \
149 (p)->qforw->qback = (q)->qback, \
150 (q)->qforw->qback = (p), \
151 (p)->qforw = (q)->qforw, \
152 (q)->qforw = (q), \
153 (q)->qback = (q)))
154
155 #define H (14)
156 #define HSIZE (1 << H)
157 #define hash(h, c) (((((h) >> (H - 8)) | (h) << 8) ^ (c)) & (HSIZE - 1))
158
159 char *cc_buffer;
160 struct cc **cc_output; /* the output array */
161 short *cc_places[TOKEN_MAX + 1];
162 short *cc_hashcodes; /* for computing hashcodes */
163 struct cc **cc_htab; /* the hash table */
164 struct cc **cc_tokens; /* holds all the active tokens */
165 struct cc_undo {
166 struct cc **pos;
167 struct cc *val;
168 } *cc_undo;
169
170 long cc_time, cc_time0;
171
172 char *cc_tt_ob, *cc_tt_obe;
173
174 int cc_compress __P((struct cc **, struct cc **, char));
175 void cc_compress_cleanup __P((struct cc **, int));
176 void cc_compress_phase __P((struct cc **, int, struct cc **, int));
177 void cc_compress_phase1 __P((struct cc **, struct cc **, int, int));
178 void cc_output_phase __P((char *, struct cc **, int));
179 int cc_sweep __P((char *, int, struct cc **, int));
180 void cc_sweep0 __P((char *, int, int));
181 int cc_sweep_phase __P((char *, int, struct cc **));
182 void cc_sweep_reverse __P((struct cc **, short *));
183 int cc_token_compare __P((const void *, const void *));
184
185 int
186 ccinit()
187 {
188 int i, j;
189 struct cc *p;
190
191 if (tt.tt_token_max > cc_token_max)
192 tt.tt_token_max = cc_token_max;
193 if (tt.tt_token_min < cc_token_min)
194 tt.tt_token_min = cc_token_min;
195 if (tt.tt_token_min > tt.tt_token_max) {
196 tt.tt_ntoken = 0;
197 return 0;
198 }
199 if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */
200 tt.tt_ntoken = cc_ntoken / 2;
201 #define C(x) (sizeof (x) / sizeof *(x))
202 for (i = 0; i < C(cc_thresholds); i++) {
203 int h = i - tt.tt_put_token_cost;
204 if (h > 0)
205 cc_thresholds[i] =
206 (tt.tt_set_token_cost + 1 + h - 1) / h + 1;
207 else
208 cc_thresholds[i] = 0;
209 }
210 for (i = 0; i < C(cc_score_adjustments); i++) {
211 int t = cc_thresholds[i];
212 for (j = 0; j < C(*cc_score_adjustments); j++) {
213 if (j >= t)
214 cc_score_adjustments[i][j] =
215 - (i - tt.tt_put_token_cost);
216 else if (j < t - 1)
217 cc_score_adjustments[i][j] = 0;
218 else
219 /*
220 * cost now is
221 * length * (ccount + 1) a
222 * cost before was
223 * set-token-cost + length +
224 * ccount * put-token-cost b
225 * the score adjustment is (b - a)
226 */
227 cc_score_adjustments[i][j] =
228 tt.tt_set_token_cost + i +
229 j * tt.tt_put_token_cost -
230 i * (j + 1);
231 if (j >= t)
232 cc_initial_scores[i][j] = 0;
233 else
234 /*
235 * - (set-token-cost +
236 * (length - put-token-cost) -
237 * (length - put-token-cost) * ccount)
238 */
239 cc_initial_scores[i][j] =
240 - (tt.tt_set_token_cost +
241 (i - tt.tt_put_token_cost) -
242 (i - tt.tt_put_token_cost) * j);
243 }
244 }
245 #ifndef cc_weight
246 for (i = 1; i < C(cc_wthresholds); i++) {
247 cc_wthresholds[i] =
248 ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
249 i / 5 + 1) *
250 cc_weight + 1;
251 cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
252 }
253 #endif
254 #undef C
255 if ((cc_output = (struct cc **)
256 malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
257 goto nomem;
258 if ((cc_hashcodes = (short *)
259 malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
260 goto nomem;
261 if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
262 goto nomem;
263 if ((cc_tokens = (struct cc **)
264 malloc((unsigned)
265 (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
266 sizeof *cc_tokens)) == 0)
267 goto nomem;
268 if ((cc_undo = (struct cc_undo *)
269 malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
270 goto nomem;
271 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
272 if ((cc_places[i] = (short *)
273 malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
274 goto nomem;
275 cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
276 cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
277 cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
278 cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
279 if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
280 goto nomem;
281 for (i = 0; i < tt.tt_ntoken; i++) {
282 p->code = i;
283 p->time = -1;
284 p->qback = cc_q0a.qback;
285 p->qforw = &cc_q0a;
286 p->qback->qforw = p;
287 cc_q0a.qback = p;
288 p++;
289 }
290 for (; i < cc_ntoken; i++) {
291 p->code = -1;
292 p->time = -1;
293 p->qback = cc_q1a.qback;
294 p->qforw = &cc_q1a;
295 p->qback->qforw = p;
296 cc_q1a.qback = p;
297 p++;
298 }
299 cc_tt_ob = tt_ob;
300 cc_tt_obe = tt_obe;
301 if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
302 goto nomem;
303 return 0;
304 nomem:
305 wwerrno = WWE_NOMEM;
306 return -1;
307 }
308
309 void
310 ccstart()
311 {
312 ttflush();
313 tt_obp = tt_ob = cc_buffer;
314 tt_obe = tt_ob + cc_bufsize;
315 tt.tt_flush = ccflush;
316 if (cc_trace) {
317 cc_trace_fp = fopen("window-trace", "a");
318 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
319 }
320 ccreset();
321 }
322
323 void
324 ccreset()
325 {
326 struct cc *p;
327
328 memset((char *) cc_htab, 0, HSIZE * sizeof *cc_htab);
329 for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
330 p->hback = 0;
331 for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
332 p->hback = 0;
333 }
334
335 void
336 ccend()
337 {
338
339 ttflush();
340 tt_obp = tt_ob = cc_tt_ob;
341 tt_obe = cc_tt_obe;
342 tt.tt_flush = 0;
343 if (cc_trace_fp != NULL) {
344 (void) fclose(cc_trace_fp);
345 cc_trace_fp = NULL;
346 }
347 }
348
349 void
350 ccflush()
351 {
352 int bufsize = tt_obp - tt_ob;
353 int n;
354
355 if (tt_ob != cc_buffer)
356 abort();
357 if (cc_trace_fp != NULL) {
358 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
359 (void) putc(-1, cc_trace_fp);
360 }
361 tt.tt_flush = 0;
362 (*tt.tt_compress)(1);
363 if (bufsize < tt.tt_token_min) {
364 ttflush();
365 goto out;
366 }
367 tt_obp = tt_ob = cc_tt_ob;
368 tt_obe = cc_tt_obe;
369 cc_time0 = cc_time;
370 cc_time += bufsize;
371 n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
372 cc_compress_phase(cc_output, bufsize, cc_tokens, n);
373 cc_output_phase(cc_buffer, cc_output, bufsize);
374 ttflush();
375 tt_obp = tt_ob = cc_buffer;
376 tt_obe = cc_buffer + cc_bufsize;
377 out:
378 (*tt.tt_compress)(0);
379 tt.tt_flush = ccflush;
380 }
381
382 int
383 cc_sweep_phase(buffer, bufsize, tokens)
384 char *buffer;
385 int bufsize;
386 struct cc **tokens;
387 {
388 struct cc **pp = tokens;
389 int i, n;
390 #ifdef STATS
391 int nn, ii;
392 #endif
393
394 #ifdef STATS
395 if (verbose >= 0)
396 time_begin();
397 if (verbose > 0)
398 printf("Sweep:");
399 #endif
400 cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
401 #ifdef STATS
402 ntoken_stat = 0;
403 nn = 0;
404 ii = 0;
405 #endif
406 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
407 #ifdef STATS
408 if (verbose > 0) {
409 if (ii > 7) {
410 printf("\n ");
411 ii = 0;
412 }
413 ii++;
414 printf(" (%d", i);
415 (void) fflush(stdout);
416 }
417 #endif
418 n = cc_sweep(buffer, bufsize, pp, i);
419 pp += n;
420 #ifdef STATS
421 if (verbose > 0) {
422 if (--n > 0) {
423 printf(" %d", n);
424 nn += n;
425 }
426 putchar(')');
427 }
428 #endif
429 }
430 qinsertq(&cc_q1b, &cc_q1a);
431 #ifdef STATS
432 if (verbose > 0)
433 printf("\n %d tokens, %d candidates\n",
434 ntoken_stat, nn);
435 if (verbose >= 0)
436 time_end();
437 #endif
438 return pp - tokens;
439 }
440
441 void
442 cc_sweep0(buffer, n, length)
443 char *buffer;
444 int n, length;
445 {
446 char *p;
447 short *hc;
448 int i;
449 short c;
450 short pc = tt.tt_padc;
451
452 /* n and length are at least 1 */
453 p = buffer++;
454 hc = cc_hashcodes;
455 i = n;
456 do {
457 if ((*hc++ = *p++) == pc)
458 hc[-1] = -1;
459 } while (--i);
460 while (--length) {
461 p = buffer++;
462 hc = cc_hashcodes;
463 for (i = n--; --i;) {
464 if ((c = *p++) == pc || *hc < 0)
465 c = -1;
466 else
467 c = hash(*hc, c);
468 *hc++ = c;
469 }
470 }
471 }
472
473 int
474 cc_sweep(buffer, bufsize, tokens, length)
475 char *buffer;
476 int bufsize;
477 struct cc **tokens;
478 int length;
479 {
480 struct cc *p;
481 char *cp;
482 int i;
483 short *hc;
484 short *places = cc_places[length];
485 struct cc **pp = tokens;
486 short threshold = thresh(length);
487 #ifndef cc_weight
488 short wthreshold = wthresh(length);
489 short limit = wlimit(length);
490 #endif
491 int time;
492 short pc = tt.tt_padc;
493
494 i = length - 1;
495 bufsize -= i;
496 cp = buffer + i;
497 hc = cc_hashcodes;
498 time = cc_time0;
499 for (i = 0; i < bufsize; i++, time++) {
500 struct cc **h;
501
502 {
503 short *hc1 = hc;
504 short c = *cp++;
505 short hh;
506 if ((hh = *hc1) < 0 || c == pc) {
507 *hc1++ = -1;
508 hc = hc1;
509 continue;
510 }
511 h = cc_htab + (*hc1++ = hash(hh, c));
512 hc = hc1;
513 }
514 for (p = *h; p != 0; p = p->hforw)
515 if (p->length == (char) length) {
516 char *p1 = p->string;
517 char *p2 = cp - length;
518 int n = length;
519 do
520 if (*p1++ != *p2++)
521 goto fail;
522 while (--n);
523 break;
524 fail:;
525 }
526 if (p == 0) {
527 p = cc_q1a.qback;
528 if (p == &cc_q1a ||
529 (p->time >= cc_time0 && p->length == (char) length))
530 continue;
531 if (p->hback != 0)
532 if ((*p->hback = p->hforw) != 0)
533 p->hforw->hback = p->hback;
534 {
535 char *p1 = p->string;
536 char *p2 = cp - length;
537 int n = length;
538 do
539 *p1++ = *p2++;
540 while (--n);
541 }
542 p->length = length;
543 #ifndef cc_weight
544 p->weight = cc_weight;
545 #endif
546 p->time = time;
547 p->bcount = 1;
548 p->ccount = 0;
549 p->flag = 0;
550 if ((p->hforw = *h) != 0)
551 p->hforw->hback = &p->hforw;
552 *h = p;
553 p->hback = h;
554 qinsert(p, &cc_q1a);
555 places[i] = -1;
556 p->places = i;
557 #ifdef STATS
558 ntoken_stat++;
559 #endif
560 } else if (p->time < cc_time0) {
561 #ifndef cc_weight
562 if ((p->weight += p->time - time) < 0)
563 p->weight = cc_weight;
564 else if ((p->weight += cc_weight) > limit)
565 p->weight = limit;
566 #endif
567 p->time = time;
568 p->bcount = 1;
569 p->ccount = 0;
570 if (p->code >= 0) {
571 p->flag = 1;
572 *pp++ = p;
573 } else
574 #ifndef cc_weight
575 if (p->weight >= wthreshold) {
576 p->flag = 1;
577 *pp++ = p;
578 qinsert(p, &cc_q1b);
579 } else
580 #endif
581 {
582 p->flag = 0;
583 qinsert(p, &cc_q1a);
584 }
585 places[i] = -1;
586 p->places = i;
587 #ifdef STATS
588 ntoken_stat++;
589 #endif
590 } else if (p->time + length > time) {
591 /*
592 * overlapping token, don't count as two and
593 * don't update time, but do adjust weight to offset
594 * the difference
595 */
596 #ifndef cc_weight
597 if (cc_weight != 0) { /* XXX */
598 p->weight += time - p->time;
599 if (!p->flag && p->weight >= wthreshold) {
600 p->flag = 1;
601 *pp++ = p;
602 qinsert(p, &cc_q1b);
603 }
604 }
605 #endif
606 places[i] = p->places;
607 p->places = i;
608 } else {
609 #ifndef cc_weight
610 if ((p->weight += p->time - time) < 0)
611 p->weight = cc_weight;
612 else if ((p->weight += cc_weight) > limit)
613 p->weight = limit;
614 #endif
615 p->time = time;
616 p->bcount++;
617 if (!p->flag &&
618 /* code must be < 0 if flag false here */
619 (p->bcount >= threshold
620 #ifndef cc_weight
621 || p->weight >= wthreshold
622 #endif
623 )) {
624 p->flag = 1;
625 *pp++ = p;
626 qinsert(p, &cc_q1b);
627 }
628 places[i] = p->places;
629 p->places = i;
630 }
631 }
632 if ((i = pp - tokens) > 0) {
633 *pp = 0;
634 if (cc_reverse)
635 cc_sweep_reverse(tokens, places);
636 if (cc_sort && i > 1) {
637 qsort((char *) tokens, i, sizeof *tokens,
638 cc_token_compare);
639 }
640 if (cc_chop) {
641 if ((i = i * cc_chop / 100) == 0)
642 i = 1;
643 tokens[i] = 0;
644 }
645 i++;
646 }
647 return i;
648 }
649
650 void
651 cc_sweep_reverse(pp, places)
652 struct cc **pp;
653 short *places;
654 {
655 struct cc *p;
656 short front, back, t;
657
658 while ((p = *pp++) != 0) {
659 back = -1;
660 t = p->places;
661 /* the list is never empty */
662 do {
663 front = places[t];
664 places[t] = back;
665 back = t;
666 } while ((t = front) >= 0);
667 p->places = back;
668 }
669 }
670
671 void
672 cc_compress_phase(output, bufsize, tokens, ntoken)
673 struct cc **output;
674 int bufsize;
675 struct cc **tokens;
676 int ntoken;
677 {
678 int i;
679
680 memset((char *) output, 0, bufsize * sizeof *output);
681 for (i = 0; i < cc_npass0; i++)
682 cc_compress_phase1(output, tokens, ntoken, 0);
683 for (i = 0; i < cc_npass1; i++)
684 cc_compress_phase1(output, tokens, ntoken, 1);
685 cc_compress_cleanup(output, bufsize);
686 }
687
688 void
689 cc_compress_phase1(output, tokens, ntoken, flag)
690 struct cc **output;
691 struct cc **tokens;
692 int ntoken, flag;
693 {
694 struct cc **pp;
695 #ifdef STATS
696 int i = 0;
697 int nt = 0, cc = 0, nc = 0;
698 #endif
699
700 #ifdef STATS
701 if (verbose >= 0)
702 time_begin();
703 if (verbose > 0)
704 printf("Compress:");
705 #endif
706 pp = tokens;
707 while (pp < tokens + ntoken) {
708 #ifdef STATS
709 if (verbose > 0) {
710 ntoken_stat = 0;
711 ccount_stat = 0;
712 ncover_stat = 0;
713 if (i > 2) {
714 printf("\n ");
715 i = 0;
716 }
717 i++;
718 printf(" (%d", (*pp)->length);
719 (void) fflush(stdout);
720 }
721 #endif
722 pp += cc_compress(output, pp, flag);
723 #ifdef STATS
724 if (verbose > 0) {
725 printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
726 ncover_stat);
727 nt += ntoken_stat;
728 cc += ccount_stat;
729 nc += ncover_stat;
730 }
731 #endif
732 }
733 #ifdef STATS
734 if (verbose > 0)
735 printf("\n total: (%dt %du %dc)\n", nt, cc, nc);
736 if (verbose >= 0)
737 time_end();
738 #endif
739 }
740
741 void
742 cc_compress_cleanup(output, bufsize)
743 struct cc **output;
744 int bufsize;
745 {
746 struct cc **end;
747
748 /* the previous output phase may have been interrupted */
749 qinsertq(&cc_q0b, &cc_q0a);
750 for (end = output + bufsize; output < end;) {
751 struct cc *p;
752 int length;
753 if ((p = *output) == 0) {
754 output++;
755 continue;
756 }
757 length = p->length;
758 if (!p->flag) {
759 } else if (p->code >= 0) {
760 qinsert(p, &cc_q0b);
761 p->flag = 0;
762 } else if (p->ccount == 0) {
763 *output = 0;
764 } else if (p->ccount >= thresh(length)
765 #ifndef cc_weight
766 || wthreshp(p->weight, length)
767 #endif
768 ) {
769 p->flag = 0;
770 } else {
771 p->ccount = 0;
772 *output = 0;
773 }
774 output += length;
775 }
776 }
777
778 int
779 cc_compress(output, tokens, flag)
780 struct cc **output;
781 struct cc **tokens;
782 char flag;
783 {
784 struct cc **pp = tokens;
785 struct cc *p = *pp++;
786 int length = p->length;
787 int threshold = thresh(length);
788 #ifndef cc_weight
789 short wthreshold = wthresh(length);
790 #endif
791 short *places = cc_places[length];
792 int *initial_scores = cc_initial_scores[length];
793 int initial_score0 = put_token_score(length);
794
795 do {
796 int score;
797 struct cc_undo *undop;
798 int ccount;
799 #ifdef STATS
800 int ncover;
801 #endif
802 int i;
803
804 ccount = p->ccount;
805 if ((short) ccount >= p->bcount)
806 continue;
807 if (p->code >= 0 || ccount >= threshold)
808 score = 0;
809 #ifndef cc_weight
810 else if (p->weight >= wthreshold)
811 /* allow one fewer match than normal */
812 /* XXX, should adjust for ccount */
813 score = - tt.tt_set_token_cost;
814 #endif
815 else
816 score = initial_scores[ccount];
817 undop = cc_undo;
818 #ifdef STATS
819 ncover = 0;
820 #endif
821 for (i = p->places; i >= 0; i = places[i]) {
822 struct cc **jp;
823 struct cc *x;
824 struct cc **ip = output + i;
825 int score0 = initial_score0;
826 struct cc **iip = ip + length;
827 struct cc_undo *undop1 = undop;
828
829 if ((x = *(jp = ip)) != 0)
830 goto z;
831 while (--jp >= output)
832 if ((x = *jp) != 0) {
833 if (jp + x->length > ip)
834 goto z;
835 break;
836 }
837 jp = ip + 1;
838 while (jp < iip) {
839 if ((x = *jp) == 0) {
840 jp++;
841 continue;
842 }
843 z:
844 if (x == p)
845 goto undo;
846 #ifdef STATS
847 ncover++;
848 #endif
849 undop->pos = jp;
850 undop->val = x;
851 undop++;
852 *jp = 0;
853 x->ccount--;
854 score_adjust(score0, x);
855 if (score0 < 0 && flag)
856 goto undo;
857 jp += x->length;
858 }
859 undop->pos = ip;
860 undop->val = 0;
861 undop++;
862 *ip = p;
863 ccount++;
864 score += score0;
865 continue;
866 undo:
867 while (--undop >= undop1)
868 if ((*undop->pos = x = undop->val))
869 x->ccount++;
870 undop++;
871 }
872 if (score > 0) {
873 #ifdef STATS
874 ccount_stat += ccount - p->ccount;
875 ntoken_stat++;
876 ncover_stat += ncover;
877 #endif
878 p->ccount = ccount;
879 } else {
880 struct cc_undo *u = cc_undo;
881 while (--undop >= u) {
882 struct cc *x;
883 if ((*undop->pos = x = undop->val))
884 x->ccount++;
885 }
886 }
887 } while ((p = *pp++) != 0);
888 return pp - tokens;
889 }
890
891 void
892 cc_output_phase(buffer, output, bufsize)
893 char *buffer;
894 struct cc **output;
895 int bufsize;
896 {
897 int i;
898 struct cc *p, *p1;
899
900 for (i = 0; i < bufsize;) {
901 if ((p = output[i]) == 0) {
902 ttputc(buffer[i]);
903 i++;
904 } else if (p->code >= 0) {
905 if (--p->ccount == 0)
906 qinsert(p, &cc_q0a);
907 (*tt.tt_put_token)(p->code, p->string, p->length);
908 wwntokuse++;
909 wwntoksave += put_token_score(p->length);
910 i += p->length;
911 } else if ((p1 = cc_q0a.qback) != &cc_q0a) {
912 p->code = p1->code;
913 p1->code = -1;
914 qinsert(p1, &cc_q1a);
915 if (--p->ccount == 0)
916 qinsert(p, &cc_q0a);
917 else
918 qinsert(p, &cc_q0b);
919 (*tt.tt_set_token)(p->code, p->string, p->length);
920 wwntokdef++;
921 wwntoksave -= tt.tt_set_token_cost;
922 i += p->length;
923 } else {
924 p->ccount--;
925 ttwrite(p->string, p->length);
926 wwntokbad++;
927 i += p->length;
928 }
929 }
930 wwntokc += bufsize;
931 }
932
933 int
934 cc_token_compare(p1, p2)
935 const void *p1, *p2;
936 {
937 const struct cc **vp1 = (void *)p1;
938 const struct cc **vp2 = (void *)p2;
939 return (*vp2)->bcount - (*vp1)->bcount;
940 }