2 * Copyright (c) 1996-1998, Index Data.
3 * See the file LICENSE for details.
6 * $Id: merge-d.c,v 1.18 1999-08-25 18:09:24 heikki Exp $
11 * - Input filter: Eliminate del-ins pairs, tell if only one entry (or none)
12 * - single-entry optimizing (keep the one entry in the dict, no block)
13 * - study and optimize block sizes (later)
14 * - Clean up the different ways diffs are handled in writing and reading
15 * - Keep a merge-count in the firstpp, and if the block has already been
16 * merged, reduce it to a larger size even if it could fit in a small one!
17 * - Keep minimum freespace in the category table, and use that in reduce!
18 * - pass a space-needed for separateDiffBlock and reduce to be able to
19 * reserve more room for diffs, or to force a separate (larger?) block
20 * - Idea: Simplify the structure, so that the first block is always diffs.
21 * On small blocks, that is all we have. Once a block has been merged, we
22 * allocate the first main block and a (new) firstblock ffor diffs. From
23 * that point on the word has two blocks for it.
24 * - On allocating more blocks (in append), check the order of blocks, and
25 * if needed, swap them.
26 * - In merge, merge also with the input data.
27 * - Write a routine to save/load indexes into a block, save only as many
28 * bytes as needed (size, diff, diffindexes)
33 * There is a confusion about the block addresses. cat or type is the category,
34 * pos or block is the block number. pp structures keep these two separate,
35 * and combine when saving the pp. The next pointer in the pp structure is
36 * also a combined address, but needs to be combined every time it is needed,
37 * and separated when the partss are needed... This is done with the isamd_
38 * _block, _type, and _addr macros. The _addr takes block and type as args,
39 * in that order. This conflicts with the order these are often mentioned in
40 * the debug log calls, and other places, leading to small mistakes here
43 * Needs cleaning! The way diff blocks are handled in append and reading is
44 * quite different, and likely to give maintenance problems.
46 * log levels (set isamddebug=x in zebra.cfg (or what ever cfg file you use) )
47 * 0 = no logging. Default
48 * 1 = no logging here. isamd logs overall statistics
49 * 2 = Each call to isamd_append with start address and no more
50 * 3 = Start and type of append, start of merge, and result of append
51 * 4 = Block allocations
52 * 5 = Block-level operations (read/write)
53 * 6 = Details about diff blocks etc.
54 * 7 = Log each record as it passes the system (once)
55 * 8 = Log raw and (de)coded data
56 * 9 = Anything else that may be useful
57 * .. = Anything needed to hunt a specific bug
58 * (note that all tests in the code are like debug>3, which means 4 or above!)
60 * Design for the new and improved isamd
62 * - The first block is only diffs, no straight data
63 * - Additional blocks are straight data
64 * - When a diff block gets filled up, a data block is created by
65 * merging the diffs with the data
68 * - Isamd_pp: buffer for diffs and for data
69 * keep both pos, type, and combined address
70 * routine to set the address
71 * - diffbuf: lengths as short ints, or bytes for small blocks
72 * - keys are of key_struct, not just a number of bytes.
76 * - create_new_block if needed
79 * - get diffend, start encoding
82 * - if no room, then realloc block in larger size
83 * - if still no room, merge and exit
84 * - append in the block
87 * - just as before, except that merges also input data directly
88 * - writes into new data blocks
91 * - isamd.c: load firstpp, load datablock
92 * save firstpp, save datablock
93 * - Readlength, writelength - handling right size of len fields
94 * - isamd_read_main_item: take also a merge input structure, and merge it too
95 * - prefilter: cache two inputs, and check if they cancel.
96 * - single-item optimization
98 * questions: Should we realloc firstblocks in a different size as the main
99 * blocks. Makes a sideways seek, which is bound to be slowe. But saves some
100 * update time. Compromise: alloc the first one in the size of the datablock,
101 * but increase if necessary. Large blocks get a large diff, ok. Small ones
102 * may get an extra seek in read, but save merges.
106 #define NEW_ISAM_D 0 /* not yet ready to delete the old one! */
113 #include "../index/index.h"
117 struct ISAMD_DIFF_s {
127 static char *hexdump(unsigned char *p, int len, char *buff) {
128 static char localbuff[128];
130 if (!buff) buff=localbuff;
133 sprintf(bytebuff,"%02x",*p);
135 strcat(buff,bytebuff);
136 if (len) strcat(buff,",");
142 /* The next many lines are the old ISAM_D. Works, but not optimal */
144 static int separateDiffBlock(ISAMD_PP pp)
146 int limit = sizeof(int) + 8;
148 return 1; /* multi-block chains always have a separate diff block */
149 return ( pp->size + limit >= pp->is->method->filecat[pp->cat].bsize);
150 /* make sure there is at least room for the length and one diff. if not, */
151 /* it goes to a separate block. Assumes max diff is 8 bytes. Not */
152 /* unreaalistic in large data sets, where first sysno may be very large, */
153 /* and even the first seqno may be quite something. */
155 /* todo: Make the limit adjustable in the filecat table ! */
159 /**************************************************************
161 **************************************************************/
163 static void getDiffInfo(ISAMD_PP pp, int diffidx)
164 { /* builds the diff info structures from a diffblock */
165 int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +2;
166 /* Each diff takes at least 5 bytes. Probably more, but this is safe */
167 int i=1; /* [0] is used for the main data */
168 int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s);
170 pp->diffinfo = xmalloc( diffsz );
171 memset(pp->diffinfo,'\0',diffsz);
172 if (pp->is->method->debug > 5)
173 logf(LOG_LOG,"isamd_getDiffInfo: %d (%d:%d), ix=%d mx=%d",
174 isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, diffidx,maxinfos);
179 if ( diffidx+sizeof(int) > pp->is->method->filecat[pp->cat].bsize )
181 if (pp->is->method->debug > 5)
182 logf(LOG_LOG,"isamd_getDiffInfo:Near end (no room for len) at ix=%d n=%d",
184 return; /* whole block done */
186 memcpy( &pp->diffinfo[i].maxidx, &pp->diffbuf[diffidx], sizeof(int) );
188 if (pp->is->method->debug > 5)
189 logf(LOG_LOG,"isamd_getDiffInfo: max=%d ix=%d dbuf=%p",
190 pp->diffinfo[i].maxidx, diffidx, pp->diffbuf);
192 if ( (pp->is->method->debug > 0) &&
193 (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) )
194 { /* bug-hunting, this fails on some long runs that log too much */
195 logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d",
196 __FILE__,__LINE__, diffidx);
197 logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx,
198 pp->is->method->filecat[pp->cat].bsize);
199 logf(LOG_LOG,"pp=%d=%d:%d pp->nx=%d=%d:%d",
200 isamd_addr(pp->pos,pp->cat), pp->pos, pp->cat,
201 pp->next, isamd_type(pp->next), isamd_block(pp->next) );
203 assert(pp->diffinfo[i].maxidx <= pp->is->method->filecat[pp->cat].bsize+1);
205 if (0==pp->diffinfo[i].maxidx)
207 if (pp->is->method->debug > 5) //!!! 4
208 logf(LOG_LOG,"isamd_getDiffInfo:End mark at ix=%d n=%d",
210 return; /* end marker */
212 diffidx += sizeof(int);
213 pp->diffinfo[i].decodeData = (*pp->is->method->code_start)(ISAMD_DECODE);
214 pp->diffinfo[i].diffidx = diffidx;
215 if (pp->is->method->debug > 5)
216 logf(LOG_LOG,"isamd_getDiff[%d]:%d-%d %s",
217 i,diffidx-sizeof(int),pp->diffinfo[i].maxidx,
218 hexdump((char *)&pp->diffbuf[diffidx-4],8,0) );
219 diffidx=pp->diffinfo[i].maxidx;
220 if ( diffidx > pp->is->method->filecat[pp->cat].bsize )
221 return; /* whole block done */
224 assert (!"too many diff sequences in the block");
227 static void loadDiffs(ISAMD_PP pp)
228 { /* assumes pp is a firstblock! */
232 return; /* no diffs to talk about */
234 { /* separate diff block, load it */
235 pp->diffbuf= xmalloc( pp->is->method->filecat[pp->cat].bsize);
236 diffaddr=isamd_addr(pp->diffs/2, pp->cat);
237 isamd_read_block (pp->is, isamd_type(diffaddr),
238 isamd_block(diffaddr), pp->diffbuf );
239 diffidx= ISAMD_BLOCK_OFFSET_N;
240 if (pp->is->method->debug > 4)
241 logf(LOG_LOG,"isamd_LoadDiffs: loaded block %d=%d:%d, d=%d ix=%d",
242 diffaddr, isamd_type(diffaddr),isamd_block(diffaddr),
246 { /* integrated block, just set the pointers */
247 pp->diffbuf = pp->buf;
248 diffidx = pp->size; /* size is the beginning of diffs, diffidx the end*/
249 if (pp->is->method->debug > 4)
250 logf(LOG_LOG,"isamd_LoadDiffs: within %d=%d:%d, d=%d ix=%d ",
251 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos, pp->diffs, diffidx);
253 getDiffInfo(pp,diffidx);
257 void isamd_free_diffs(ISAMD_PP pp)
260 if (pp->is->method->debug > 5)
261 logf(LOG_LOG,"isamd_free_diffs: pp=%p di=%p", pp, pp->diffinfo);
264 for (i=1;pp->diffinfo[i].decodeData;i++)
266 if (pp->is->method->debug > 8)
267 logf(LOG_LOG,"isamd_free_diffs [%d]=%p",i,
268 pp->diffinfo[i].decodeData);
269 (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData);
272 if (pp->diffbuf != pp->buf)
274 } /* isamd_free_diffs */
277 /* Reads one item and corrects for the diffs, if any */
278 /* return 1 for ok, 0 for eof */
279 int isamd_read_item (ISAMD_PP pp, char **dst)
284 int winner=0; /* which diff holds the day */
285 int i; /* looping diffs */
288 if (pp->diffs==0) /* no diffs, just read the thing */
289 return isamd_read_main_item(pp,dst);
296 if (0==pp->diffinfo[0].key.sysno)
297 { /* 0 is special case, main data. */
298 keyptr=(char*) &(pp->diffinfo[0].key);
299 pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr);
300 if (pp->is->method->debug > 7)
301 logf(LOG_LOG,"isamd_read_item: read main %d.%d (%x.%x)",
302 pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno,
303 pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno);
304 } /* get main data */
306 for (i=1; (!retry) && (pp->diffinfo[i].decodeData); i++)
308 if (pp->is->method->debug > 8)
309 logf(LOG_LOG,"isamd_read_item: considering d%d %d.%d ix=%d mx=%d",
310 i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
311 pp->diffinfo[i].diffidx, pp->diffinfo[i].maxidx);
313 if ( (0==pp->diffinfo[i].key.sysno) &&
314 (pp->diffinfo[i].diffidx < pp->diffinfo[i].maxidx))
315 {/* read a new one, if possible */
316 codeptr= codestart = &(pp->diffbuf[pp->diffinfo[i].diffidx]);
317 keyptr=(char *)&(pp->diffinfo[i].key);
318 (*pp->is->method->code_item)(ISAMD_DECODE,
319 pp->diffinfo[i].decodeData, &keyptr, &codeptr);
320 pp->diffinfo[i].diffidx += codeptr-codestart;
321 pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1;
322 pp->diffinfo[i].key.seqno = pp->diffinfo[i].key.seqno >>1 ;
323 if (pp->is->method->debug > 7)
324 logf(LOG_LOG,"isamd_read_item: read diff[%d] %d.%d (%x.%x)",i,
325 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
326 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
328 if ( 0!= pp->diffinfo[i].key.sysno)
329 { /* got a key, compare */
330 cmp=key_compare(&pp->diffinfo[i].key, &pp->diffinfo[winner].key);
331 if (0==pp->diffinfo[winner].key.sysno)
332 cmp=-1; /* end of main sequence, take all diffs */
335 if (pp->is->method->debug > 8)
336 logf(LOG_LOG,"isamd_read_item: ins %d<%d %d.%d (%x.%x) < %d.%d (%x.%x)",
338 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
339 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
340 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
341 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
342 if (pp->diffinfo[i].mode) /* insert diff, should always be */
345 assert(!"delete diff for nonexisting item");
346 /* is an assert too steep here? Not really.*/
350 if (!pp->diffinfo[i].mode) /* delete diff. should always be */
352 if (pp->is->method->debug > 8)
353 logf(LOG_LOG,"isamd_read_item: del %d at%d %d.%d (%x.%x)",
355 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
356 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
357 pp->diffinfo[winner].key.sysno=0; /* delete it */
360 if (pp->is->method->debug > 2)
361 logf(LOG_LOG,"isamd_read_item: duplicate ins %d at%d %d.%d (%x.%x)",
363 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
364 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
365 /* skip the insert, since we already have it in the base */
366 /* Should we fail an assertion here??? */
367 pp->diffinfo[i].key.sysno=0; /* done with the delete */
368 retry=1; /* start all over again */
370 /* else it is a later key, its turn will come */
372 } /* for each diff */
375 if ( pp->diffinfo[winner].key.sysno)
377 if (pp->is->method->debug > 7)
378 logf(LOG_LOG,"isamd_read_item: got %d %d.%d (%x.%x)",
380 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
381 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
382 memcpy(*dst, &pp->diffinfo[winner].key, sizeof(struct it_key) );
383 *dst += sizeof(struct it_key);
384 pp->diffinfo[winner].key.sysno=0; /* used that one up */
389 if (pp->is->method->debug > 7)
390 logf(LOG_LOG,"isamd_read_item: eof w=%d %d.%d (%x.%x)",
392 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
393 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
394 assert(winner==0); /* if nothing found, nothing comes from a diff */
399 } /* isamd_read_item */
401 /*****************************************************************
403 *****************************************************************/
405 static void isamd_reduceblock(ISAMD_PP pp)
406 /* takes a large block, and reduces its category if possible */
407 /* Presumably the first block in an isam-list */
410 return; /* existing block, do not touch */
411 if (pp->is->method->debug > 5)
412 logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d",
413 pp->pos, pp->cat, pp->size);
414 while ( ( pp->cat > 0 ) && (!pp->next) &&
415 (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) )
417 pp->pos = isamd_alloc_block(pp->is, pp->cat);
418 if (pp->is->method->debug > 5)
419 logf(LOG_LOG,"isamd_reduce: got p=%d c=%d sz=%d",
420 pp->pos, pp->cat, pp->size);
426 static int save_first_pp ( ISAMD_PP firstpp)
428 isamd_reduceblock(firstpp);
429 isamd_buildfirstblock(firstpp);
430 isamd_write_block(firstpp->is,firstpp->cat,firstpp->pos,firstpp->buf);
431 return isamd_addr(firstpp->pos,firstpp->cat);
434 static void save_last_pp (ISAMD_PP pp)
436 pp->next = 0;/* just to be sure */
437 isamd_buildlaterblock(pp);
438 isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf);
442 static int save_both_pps (ISAMD_PP firstpp, ISAMD_PP pp)
444 /* order of things: Better to save firstpp first, if there are just two */
445 /* blocks, but last if there are blocks in between, as these have already */
446 /* been saved... optimise later (that's why this is in its own func...*/
447 int retval = save_first_pp(firstpp);
452 isamd_pp_close(firstpp);
454 } /* save_both_pps */
456 static ISAMD_PP read_diff_block(ISAMD_PP firstpp, int* p_diffidx)
457 { /* reads the diff block (if separate) and sets diffidx right */
462 { /* no diffs yet, create room for them */
463 if (separateDiffBlock(firstpp))
464 { /* create a new block */
465 pp=isamd_pp_open(pp->is,isamd_addr(0,firstpp->cat));
466 pp->pos = isamd_alloc_block(pp->is, pp->cat);
467 firstpp->diffs = pp->pos*2 +1;
468 diffidx = pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N;
469 if (pp->is->method->debug >5)
470 logf(LOG_LOG,"isamd_appd: alloc diff (d=%d) %d=%d:%d ix=%d",
472 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
476 { /* prepare to append diffs in head */
478 pp->diffs = diffidx *2 +0;
479 i=diffidx; /* make an end marker */
480 while ( ( i < pp->is->method->filecat[pp->cat].bsize) &&
481 ( i <= diffidx + sizeof(int)))
483 if (pp->is->method->debug >5)
484 logf(LOG_LOG,"isamd_appd: set up diffhead (d=%d) %d=%d:%d ix=%d",
486 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
491 { /* existing diffs */
493 { /* diffs in a separate block, load it */
494 pp=isamd_pp_open(pp->is, isamd_addr(firstpp->diffs/2,pp->cat));
495 diffidx = pp->offset= pp->size;
496 if (pp->is->method->debug >5)
497 logf(LOG_LOG,"isamd_appd: loaded diff (d=%d) %d=%d:%d ix=%d",
499 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
503 { /* diffs within the nead */
504 diffidx= pp->diffs/2;
505 if (pp->is->method->debug >5)
506 logf(LOG_LOG,"isamd_appd: diffs in head d=%d %d=%d:%d ix=%d sz=%d",
508 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
511 } /* diffs exist already */
512 *p_diffidx = diffidx;
514 } /* read_diff_block */
519 /*******************************************************************
520 * Building main blocks (no diffs)
521 *******************************************************************/
525 static ISAMD_PP get_new_main_block( ISAMD_PP firstpp, ISAMD_PP pp)
526 { /* allocates a new block for the main data, and links it in */
529 { /* special case: it was the first block. Save much later */
531 { /* firstpp not allocated yet, do so now, */
532 /* to keep blocks in order. Don't save yet, though */
533 firstpp->pos = isamd_alloc_block(pp->is, firstpp->cat);
535 newblock = isamd_alloc_block(pp->is, firstpp->cat);
536 firstpp->next = isamd_addr(newblock,firstpp->cat);
537 /* keep the largest category */
538 pp=isamd_pp_open(pp->is,isamd_addr(0,firstpp->cat));/*don't load*/
540 pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N;
542 if (pp->is->method->debug >3)
543 logf(LOG_LOG,"isamd_g_mainblk: Alloc2 f=%d=%d:%d n=%d=%d:%d",
544 isamd_addr(firstpp->pos,firstpp->cat),
545 firstpp->cat, firstpp->pos,
546 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos );
549 { /* it was not the first block */
550 newblock = isamd_alloc_block(pp->is, firstpp->cat);
551 pp->next = isamd_addr(newblock,firstpp->cat);
552 if (pp->is->method->debug >3)
553 logf(LOG_LOG,"isamd_build: Alloc1 after p=%d=%d:%d->%d=%d:%d",
554 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
555 isamd_addr(newblock,pp->cat), pp->cat, newblock );
556 isamd_buildlaterblock(pp);
557 isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf);
558 pp->size = pp->offset = ISAMD_BLOCK_OFFSET_N;
560 pp->cat = firstpp->cat;
562 pp->cat = firstpp->cat; /* is already, never mind */
565 } /* get_new_main_block */
568 static ISAMD_PP append_main_item(ISAMD_PP firstpp,
570 struct it_key *i_key,
572 { /* appends one item in the main data block, allocates new if needed */
573 char *i_item= (char *) i_key; /* same as char */
576 char *c_ptr = codebuff;
580 int maxsize = pp->is->method->filecat[pp->is->max_cat].bsize;
584 (*pp->is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
585 codelen = c_ptr - codebuff;
586 assert ( (codelen<128) && (codelen>0));
587 if (pp->is->method->debug >7)
588 logf(LOG_LOG,"isamd:build: coded into %s (nk=%d)",
589 hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1);
591 if (pp->offset + codelen > maxsize )
592 { /* oops, block full - get a new one */
593 pp = get_new_main_block( firstpp, pp );
594 /* reset encoging and code again */
595 (*pp->is->method->code_reset)(encoder_data);
598 (*pp->is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
599 codelen = c_ptr - codebuff;
600 assert ( (codelen<128) && (codelen>0));
601 if (pp->is->method->debug >7)
602 logf(LOG_LOG,"isamd:build: recoded into %s (nk=%d)",
603 hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1);
606 /* write the data into pp, now we must have room */
607 memcpy(&(pp->buf[pp->offset]),codebuff,codelen);
608 pp->offset += codelen;
611 /* clear the next 4 bytes in block, to avoid confusions with diff lens */
612 /* dirty, it should not be done here, but something slips somewhere, and */
613 /* I hope this fixes it... - Heikki */
614 codelen = pp->offset;
615 while ( (codelen < maxsize ) && (codelen <= pp->offset+4) )
616 pp->buf[codelen++] = '\0';
618 } /* append_main_item */
621 static int isamd_build_first_block(ISAMD is, ISAMD_I data)
623 struct it_key i_key; /* input key */
624 char *i_item= (char *) &i_key; /* same as char */
627 int i_mode; /* 0 for delete, 1 for insert */
635 ++(is->files[0].no_fbuilds);
637 firstpp=pp=isamd_pp_open(is, isamd_addr(0,is->max_cat));
638 firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1;
640 encoder_data=(*is->method->code_start)(ISAMD_ENCODE);
642 if (is->method->debug >2)
643 logf(LOG_LOG,"isamd_bld start: p=%d=%d:%d sz=%d maxsz=%d ",
644 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
645 pp->size, pp->is->method->filecat[pp->is->max_cat].bsize);
647 /* read first input */
649 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
651 assert( i_ptr-i_item == sizeof(i_key) );
653 if (pp->is->method->debug >7)
654 logf(LOG_LOG,"isamd: build_fi start: m=%d %s",
655 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
660 { /* ignore deletes here, should not happen */
661 pp= append_main_item(firstpp, pp, &i_key, encoder_data);
664 /* (try to) read the next item */
666 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
668 if ( (i_more) && (pp->is->method->debug >7) )
669 logf(LOG_LOG,"isamd: build_fi read: m=%d %s",
670 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
673 (*is->method->code_stop)(ISAMD_ENCODE, encoder_data);
675 return save_both_pps( firstpp, pp );
677 } /* build_first_block */
680 /***************************************************************
682 ***************************************************************/
685 static int merge ( ISAMD_PP *p_firstpp, /* first pp of the chain */
686 ISAMD_PP *p_pp, /* diff block */
687 struct it_key *p_key ) /* not used yet */
689 ISAMD_PP readpp = *p_firstpp;
695 ISAMD_PP firstpp; /* the new first, the one we write into */
699 ++(readpp->is->files[0].no_merges);
701 /* set up diffs as they should be for reading */
702 readpp->offset= ISAMD_BLOCK_OFFSET_1;
704 if ( (*p_firstpp)->diffs & 1 )
705 { /* separate diff block in *p_pp */
706 killblk = readpp->diffs/2;
707 diffidx /*size*/ = readpp->is->method->filecat[readpp->cat].bsize;
708 readpp->diffbuf= xmalloc( diffidx); /* copy diffs to where read wants*/
709 memcpy( readpp->diffbuf, &((*p_pp)->buf[0]), diffidx);
710 diffidx = ISAMD_BLOCK_OFFSET_N;
711 if (readpp->is->method->debug >2)
713 logf(LOG_LOG,"isamd_merge:separate diffs at ix=%d",
715 logf(LOG_LOG,"isamd_merge: dbuf=%p (from %p) pp=%p",
716 readpp->diffbuf, &((*p_pp)->buf[0]), (*p_pp) );
720 { /* integrated diffs */
721 assert ( *p_pp == *p_firstpp ); /* can only be in the first block */
722 diffidx=readpp->size;
723 readpp->diffs = diffidx*2+0;
724 readpp->diffbuf=readpp->buf;
725 if (readpp->is->method->debug >2)
726 logf(LOG_LOG,"isamd_merge:local diffs at %d: %s",
727 diffidx,hexdump(&(readpp->diffbuf[diffidx]),8,0));
730 getDiffInfo(readpp,diffidx);
731 if (readpp->is->method->debug >8)
732 logf(LOG_LOG,"isamd_merge: diffinfo=%p", readpp->diffinfo);
736 { /* we had a separate diff block, release it, we have copied the data */
737 isamd_release_block(readpp->is, readpp->cat, killblk);
738 isamd_pp_close (*p_pp);
739 if (readpp->is->method->debug >3)
740 logf(LOG_LOG,"isamd_merge: released diff block %d=%d:%d",
741 isamd_addr(killblk,readpp->cat), readpp->cat, killblk );
745 /* release our data block. Do before reading, when pos is stable ! */
748 isamd_release_block(readpp->is, readpp->cat, killblk);
749 if (readpp->is->method->debug >3)
750 logf(LOG_LOG,"isamd_merge: released old firstblock %d (%d:%d)",
751 isamd_addr(killblk,readpp->cat), readpp->cat, killblk );
753 r_ptr= (char *) &r_key;
754 r_more = isamd_read_item( readpp, &r_ptr);
756 { /* oops, all data has been deleted! what to do??? */
757 /* never mind, we have at least one more delta to add to the block */
758 /* pray that is not a delete as well... */
761 if (readpp->is->method->debug >3)
762 logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ",
764 assert (readpp->numKeys == 0);
768 /* set up the new blocks for simple writing */
769 firstpp=pp=isamd_pp_open(readpp->is,isamd_addr(0, readpp->is->max_cat));
770 firstpp->size = firstpp->offset = ISAMD_BLOCK_OFFSET_1;
771 encoder_data = (*pp->is->method->code_start)(ISAMD_ENCODE);
775 if (readpp->is->method->debug >6)
776 logf(LOG_LOG,"isamd_merge: got key %d.%d",
777 r_key.sysno, r_key.seqno );
778 pp= append_main_item(firstpp, pp, &r_key, encoder_data);
780 if ( (readpp->pos != killblk ) && (0!=readpp->pos) )
781 { /* pos can get to 0 at end of main seq, if still diffs left...*/
782 if (readpp->is->method->debug >3)
783 logf(LOG_LOG,"isamd_merge: released block %d (%d:%d) now %d=%d:%d",
784 isamd_addr(killblk,readpp->cat), readpp->cat, killblk,
785 isamd_addr(readpp->pos,readpp->cat),readpp->cat, readpp->pos );
786 isamd_release_block(readpp->is, readpp->cat, readpp->pos);
790 /* (try to) read next item */
791 r_ptr= (char *) &r_key;
792 r_more = isamd_read_item( readpp, &r_ptr);
796 /* set things up so that append can continue */
797 isamd_reduceblock(firstpp);
801 { /* the last data block is of no interest any more */
803 if (readpp->is->method->debug >4)
804 logf(LOG_LOG,"isamd_merge: saved last block %d=%d:%d",
805 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos);
809 if (readpp->is->method->debug >5)
810 logf(LOG_LOG,"isamd_merge: closing readpp %d=%d:%d di=%p",
811 isamd_addr(readpp->pos,readpp->cat), readpp->cat, readpp->pos,
813 isamd_pp_close(readpp); /* pos is 0 by now, at eof. close works anyway */
815 (*firstpp->is->method->code_stop)(ISAMD_ENCODE, encoder_data);
817 *p_firstpp = firstpp;
819 if (readpp->is->method->debug >2)
820 logf(LOG_LOG,"isamd_merge: merge ret %d=%d:%d nx=%d=%d:%d d=%d=2*%d+%d",
821 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
822 pp->next, isamd_type(pp->next), isamd_block(pp->next),
823 pp->diffs, pp->diffs/2, pp->diffs &1 );
830 /***************************************************************
832 ***************************************************************/
836 static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
838 struct it_key i_key; /* one input item */
839 char *i_item = (char *) &i_key; /* same as chars */
842 int i_mode; /* 0 for delete, 1 for insert */
852 char *c_ptr = codebuff;
857 ++(is->files[0].no_appds);
859 firstpp=isamd_pp_open(is, ipos);
860 if (is->method->debug >2)
861 logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d d=%d=%d*2+%d nk=%d",
862 ipos, isamd_type(ipos), isamd_block(ipos),
863 firstpp->diffs, firstpp->diffs/2, firstpp->diffs & 1, firstpp->numKeys);
864 pp=read_diff_block(firstpp, &diffidx);
865 encoder_data=(*is->method->code_start)(ISAMD_ENCODE);
866 maxsize = is->method->filecat[pp->cat].bsize;
868 difflenidx = diffidx;
869 diffidx+=sizeof(int); /* difflen will be stored here */
871 /* read first input */
873 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
875 if (is->method->debug >6)
876 logf(LOG_LOG,"isamd_appd: start with m=%d %s",
877 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
881 /* store the mode bit inside key */
882 assert( ((i_key.seqno<<1)>>1) == i_key.seqno); /* can spare the bit */
883 i_key.seqno = i_key.seqno * 2 + i_mode;
887 (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
888 codelen = c_ptr - codebuff;
889 assert ( (codelen<128) && (codelen>0));
890 if (is->method->debug >7)
891 logf(LOG_LOG,"isamd_appd: coded into %d: %s (nk=%d) (ix=%d)",
892 codelen, hexdump(codebuff, codelen,hexbuff),
893 firstpp->numKeys,diffidx);
895 if (diffidx + codelen > maxsize )
897 if (is->method->debug >3)
898 logf(LOG_LOG,"isamd_appd: block full (ix=%d mx=%d lix=%d)",
899 diffidx, maxsize, difflenidx);
900 if (is->method->debug >8)
901 logf(LOG_LOG,"isamd_appd: block pp=%p buf=%p [%d]:%s",
903 difflenidx, hexdump(&pp->buf[difflenidx],8,0));
905 ++(is->files[0].no_remerges);
906 merge_rc = merge (&firstpp, &pp, &i_key);
908 return merge_rc; /* merge handled them all ! */
910 /* set things up so we can continue */
911 pp = read_diff_block(firstpp, &diffidx);
912 (*is->method->code_reset)(encoder_data);
913 maxsize = is->method->filecat[pp->cat].bsize;
915 diffidx+=sizeof(int);
917 /* code the current input key again */
920 (*is->method->code_item)(ISAMD_ENCODE, encoder_data, &c_ptr, &i_ptr);
921 codelen = c_ptr - codebuff;
922 assert ( (codelen<128) && (codelen>0));
923 if (is->method->debug >7)
924 logf(LOG_LOG,"isamd_appd: recoded into %d: %s (nk=%d) (ix=%d)",
925 codelen, hexdump(codebuff, codelen,hexbuff),
926 firstpp->numKeys,diffidx);
930 /* Note: this goes horribly wrong if there is no room for the diff */
931 /* after the merge! The solution is to increase the limit in */
932 /* separateDiffBlock, to force a separate diff block earlier, and not */
933 /* to have absurdly small blocks */
934 assert ( diffidx+codelen <= maxsize );
937 memcpy(&(pp->buf[diffidx]),codebuff,codelen);
940 firstpp->numKeys++; /* insert diff */
942 firstpp->numKeys--; /* delete diff */
944 /* update length of this diff run */
945 memcpy(&(pp->buf[difflenidx]),&diffidx,sizeof(diffidx));
947 firstpp->diffs =diffidx*2+0;
951 /* (try to) read the next input */
953 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
954 if ( (i_more) && (is->method->debug >6) )
955 logf(LOG_LOG,"isamd_appd: got m=%d %s",
956 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
959 /* clear the next difflen, if room for such */
960 difflenidx = diffidx;
961 while ( (difflenidx-diffidx<=sizeof(int)) && (difflenidx<maxsize))
962 pp->buf[difflenidx++]='\0';
965 (*firstpp->is->method->code_stop)(ISAMD_ENCODE, encoder_data);
966 return save_both_pps( firstpp, pp );
972 /*************************************************************
973 * isamd_append itself, Sweet, isn't it
974 *************************************************************/
976 ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data)
981 retval = isamd_build_first_block(is,data);
983 retval = append_diffs(is,ipos,data);
990 #else /* NEW_ISAM_D */
991 /***************************************************************
992 ***************************************************************
993 ***************************************************************
994 ***************************************************************
995 ***************************************************************
996 ***************************************************************
997 ***************************************************************
998 ***************************************************************
999 ***************************************************************
1000 ***************************************************************
1001 ***************************************************************
1002 ***************************************************************
1003 ***************************************************************
1004 ***************************************************************
1005 ***************************************************************
1006 ***************************************************************
1007 ***************************************************************
1008 ***************************************************************/
1011 /***************************************************************
1012 * General support routines
1013 ***************************************************************/
1015 static void isamd_reduceblock(ISAMD_PP pp)
1016 /* takes a large block, and reduces its category if possible */
1017 /* Presumably the first block in an isam-list */
1020 return; /* existing block, do not touch */
1021 /* TODO: Probably we may touch anyway? */
1022 if (pp->is->method->debug > 5)
1023 logf(LOG_LOG,"isamd_reduce: start p=%d c=%d sz=%d",
1024 pp->pos, pp->cat, pp->size);
1025 while ( ( pp->cat > 0 ) && (!pp->next) &&
1026 (pp->offset < pp->is->method->filecat[pp->cat-1].bsize ) )
1028 pp->pos = isamd_alloc_block(pp->is, pp->cat);
1029 if (pp->is->method->debug > 5)
1030 logf(LOG_LOG,"isamd_reduce: got p=%d c=%d sz=%d",
1031 pp->pos, pp->cat, pp->size);
1035 static int save_first_pp ( ISAMD_PP firstpp)
1037 isamd_buildfirstblock(firstpp);
1038 isamd_write_block(firstpp->is,firstpp->cat,firstpp->pos,firstpp->buf);
1039 return isamd_addr(firstpp->pos,firstpp->cat);
1043 static void save_last_pp (ISAMD_PP pp)
1045 pp->next = 0;/* just to be sure */
1046 isamd_buildlaterblock(pp);
1047 isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf);
1051 static int save_both_pps (ISAMD_PP firstpp, ISAMD_PP pp)
1053 /* order of things: Better to save firstpp first, if there are just two */
1054 /* blocks, but last if there are blocks in between, as these have already */
1055 /* been saved... optimise later (that's why this is in its own func...*/
1056 int retval = save_first_pp(firstpp);
1061 isamd_pp_close(firstpp);
1063 } /* save_both_pps */
1068 /***************************************************************
1069 * Diffblock handling
1070 ***************************************************************/
1072 void isamd_free_diffs(ISAMD_PP pp)
1075 if (pp->is->method->debug > 5)
1076 logf(LOG_LOG,"isamd_free_diffs: pp=%p di=%p", pp, pp->diffinfo);
1079 for (i=1;pp->diffinfo[i].decodeData;i++)
1081 if (pp->is->method->debug > 8)
1082 logf(LOG_LOG,"isamd_free_diffs [%d]=%p",i,
1083 pp->diffinfo[i].decodeData);
1084 (*pp->is->method->code_stop)(ISAMD_DECODE,pp->diffinfo[i].decodeData);
1086 xfree(pp->diffinfo);
1087 if (pp->diffbuf != pp->buf)
1088 xfree (pp->diffbuf);
1091 } /* isamd_free_diffs */
1094 static void getDiffInfo(ISAMD_PP pp, int diffidx)
1095 { /* builds the diff info structures from a diffblock */
1096 int maxinfos = pp->is->method->filecat[pp->cat].bsize / 5 +1;
1097 /* Each diff takes at least 5 bytes. Probably more, but this is safe */
1098 int i=2; /* [0] is used for the main data, [1] for merge inputs */
1099 int diffsz= maxinfos * sizeof(struct ISAMD_DIFF_s);
1101 pp->diffinfo = xmalloc( diffsz );
1102 memset(pp->diffinfo,'\0',diffsz);
1103 if (pp->is->method->debug > 5)
1104 logf(LOG_LOG,"isamd_getDiffInfo: %d (%d:%d), ix=%d mx=%d",
1105 isamd_addr(pp->pos, pp->cat), pp->cat, pp->pos, diffidx,maxinfos);
1106 assert(pp->diffbuf);
1108 pp->diffinfo[0].maxidx=-1; /* mark as special */
1109 pp->diffinfo[1].maxidx=-1; /* mark as special */
1113 if ( diffidx+sizeof(int) > pp->is->method->filecat[pp->cat].bsize )
1115 if (pp->is->method->debug > 5)
1116 logf(LOG_LOG,"isamd_getDiffInfo:Near end (no room for len) at ix=%d n=%d",
1118 return; /* whole block done */
1120 memcpy( &pp->diffinfo[i].maxidx, &pp->diffbuf[diffidx], sizeof(int) );
1122 if (pp->is->method->debug > 5)
1123 logf(LOG_LOG,"isamd_getDiffInfo: max=%d ix=%d dbuf=%p",
1124 pp->diffinfo[i].maxidx, diffidx, pp->diffbuf);
1126 if ( (pp->is->method->debug > 0) &&
1127 (pp->diffinfo[i].maxidx > pp->is->method->filecat[pp->cat].bsize) )
1128 { /* bug-hunting, this fails on some long runs that log too much */
1129 logf(LOG_LOG,"Bad MaxIx!!! %s:%d: diffidx=%d",
1130 __FILE__,__LINE__, diffidx);
1131 logf(LOG_LOG,"i=%d maxix=%d bsz=%d", i, pp->diffinfo[i].maxidx,
1132 pp->is->method->filecat[pp->cat].bsize);
1133 logf(LOG_LOG,"pp=%d=%d:%d pp->nx=%d=%d:%d",
1134 isamd_addr(pp->pos,pp->cat), pp->pos, pp->cat,
1135 pp->next, isamd_type(pp->next), isamd_block(pp->next) );
1137 assert(pp->diffinfo[i].maxidx <= pp->is->method->filecat[pp->cat].bsize+1);
1139 if (0==pp->diffinfo[i].maxidx)
1141 if (pp->is->method->debug > 5) //!!! 4
1142 logf(LOG_LOG,"isamd_getDiffInfo:End mark at ix=%d n=%d",
1144 return; /* end marker */
1146 diffidx += sizeof(int);
1147 pp->diffinfo[i].decodeData = (*pp->is->method->code_start)(ISAMD_DECODE);
1148 pp->diffinfo[i].diffidx = diffidx;
1149 if (pp->is->method->debug > 5)
1150 logf(LOG_LOG,"isamd_getDiff[%d]:%d-%d %s",
1151 i,diffidx-sizeof(int),pp->diffinfo[i].maxidx,
1152 hexdump((char *)&pp->diffbuf[diffidx-4],8,0) );
1153 diffidx=pp->diffinfo[i].maxidx;
1154 if ( diffidx > pp->is->method->filecat[pp->cat].bsize )
1155 return; /* whole block done */
1158 assert (!"too many diff sequences in the block");
1161 /***************************************************************
1162 * Main block operations
1163 ***************************************************************/
1166 static ISAMD_PP get_new_main_block( ISAMD_PP firstpp, ISAMD_PP pp)
1167 { /* allocates a new block for the main data, and links it in */
1169 if (0 == firstpp->next)
1170 { /* special case, pp not yet allocated. */
1171 /*Started as largest size, that's fine */
1172 pp->pos = isamd_alloc_block(pp->is,pp->cat);
1173 firstpp->next = isamd_addr(pp->pos,pp->cat);
1174 if (pp->is->method->debug >3)
1175 logf(LOG_LOG,"isamd_build: Alloc 1. dblock p=%d=%d:%d",
1176 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos);
1178 newblock=isamd_alloc_block(pp->is,pp->cat);
1179 pp->next=isamd_addr(pp->cat,newblock);
1180 isamd_buildlaterblock(pp);
1181 isamd_write_block(pp->is,pp->cat,pp->pos,pp->buf);
1182 if (pp->is->method->debug >3)
1183 logf(LOG_LOG,"isamd_build: Alloc nxt %d=%d:%d -> %d=%d:%d",
1184 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos,
1185 isamd_addr(newblock,pp->cat), pp->cat, newblock);
1188 pp->size=pp->offset=ISAMD_BLOCK_OFFSET_N;
1190 } /* get_new_main_block */
1193 static ISAMD_PP append_main_item(ISAMD_PP firstpp,
1195 struct it_key *i_key)
1196 { /* appends one item in the main data block, allocates new if needed */
1197 char *i_item= (char *) i_key; /* same as char */
1200 char *c_ptr = codebuff;
1204 int maxsize = pp->is->method->filecat[pp->is->max_cat].bsize;
1208 (*pp->is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData,
1210 codelen = c_ptr - codebuff;
1211 assert ( (codelen<128) && (codelen>0));
1212 if (pp->is->method->debug >7)
1213 logf(LOG_LOG,"isamd:build: coded into %s (nk=%d)",
1214 hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1);
1216 if (pp->offset + codelen > maxsize )
1217 { /* oops, block full - get a new one */
1218 pp = get_new_main_block( firstpp, pp );
1219 /* reset encoging and code again */
1220 (*pp->is->method->code_reset)(pp->decodeClientData);
1223 (*pp->is->method->code_item)(ISAMD_ENCODE, pp->decodeClientData,
1225 codelen = c_ptr - codebuff;
1226 assert ( (codelen<128) && (codelen>0));
1227 if (pp->is->method->debug >7)
1228 logf(LOG_LOG,"isamd:build: recoded into %s (nk=%d)",
1229 hexdump(codebuff, c_ptr-codebuff,hexbuff), firstpp->numKeys+1);
1232 assert (pp->offset + codelen <= maxsize );
1234 /* write the data into pp, now we must have room */
1235 memcpy(&(pp->buf[pp->offset]),codebuff,codelen);
1236 pp->offset += codelen;
1237 pp->size += codelen;
1239 /* clear the next 4 bytes in block, to avoid confusions with diff lens */
1240 /* dirty, it should not be done here, but something slips somewhere, and */
1241 /* I hope this fixes it... - Heikki */
1242 codelen = pp->offset;
1243 while ( (codelen < maxsize ) && (codelen <= pp->offset+4) )
1244 pp->buf[codelen++] = '\0';
1246 } /* append_main_item */
1249 /***************************************************************
1251 ***************************************************************/
1253 static int merge ( ISAMD_PP firstpp, /* first pp (with diffs) */
1254 struct it_key *p_key, /* the data item that didn't fit*/
1255 ISAMD_I data) /* more input data comes here */
1259 struct it_key r_key;
1263 ISAMD_PP readpp=firstpp;
1265 int diffcat = firstpp->cat; /* keep the category of the diffblock even */
1266 /* if it is going to be empty now. */
1267 /* Alternative: Make it the minimal, and */
1268 /* resize later. Saves disk, but will lead */
1269 /* into bad seeks. */
1271 ++(readpp->is->files[0].no_merges);
1273 /* set up diffs as they should be for reading */
1274 diffidx = ISAMD_BLOCK_OFFSET_1;
1275 readpp->diffbuf=readpp->buf;
1276 getDiffInfo(readpp,diffidx);
1278 if (readpp->is->method->debug >4)
1279 logf(LOG_LOG,"isamd_merge: f=%d=%d:%d n=%d=%d:%d",
1280 isamd_addr(firstpp->pos,firstpp->cat), firstpp->cat, firstpp->pos,
1281 firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->next));
1283 /* release our data block. Do before reading, when pos is stable ! */
1284 killblk=firstpp->pos;
1287 isamd_release_block(firstpp->is, firstpp->cat, killblk);
1288 if (readpp->is->method->debug >3)
1289 logf(LOG_LOG,"isamd_merge: released old firstblock %d (%d:%d)",
1290 isamd_addr(killblk,firstpp->cat), firstpp->cat, killblk );
1293 /* force the read to reload the first data block at first try */
1294 readpp->offset=readpp->size+1;
1297 r_ptr= (char *) &r_key;
1298 r_more = isamd_read_item( readpp, &r_ptr);
1300 { /* oops, all data has been deleted! what to do??? */
1301 /* never mind, we have at least one more delta to add to the block */
1302 /* pray that is not a delete as well... */
1305 if (readpp->is->method->debug >5)
1306 logf(LOG_LOG,"isamd_merge:all data has been deleted (nk=%d) ",
1308 //assert (readpp->numKeys == 0); /* no longer true! */
1312 /* set up the new blocks for simple writing */
1313 firstpp=isamd_pp_open(readpp->is,isamd_addr(0, diffcat));
1314 firstpp->pos=isamd_alloc_block(firstpp->is,diffcat);
1316 pp=isamd_pp_open(readpp->is,isamd_addr(0,readpp->is->max_cat) );
1320 if (readpp->is->method->debug >6)
1321 logf(LOG_LOG,"isamd_merge: got key %d.%d",
1322 r_key.sysno, r_key.seqno );
1323 pp= append_main_item(firstpp, pp, &r_key);
1325 if ( (readpp->pos != killblk ) && (0!=readpp->pos) )
1326 { /* pos can get to 0 at end of main seq, if still diffs left...*/
1327 if (readpp->is->method->debug >3)
1328 logf(LOG_LOG,"isamd_merge: released block %d (%d:%d) now %d=%d:%d",
1329 isamd_addr(killblk,readpp->cat), readpp->cat, killblk,
1330 isamd_addr(readpp->pos,readpp->cat),readpp->cat, readpp->pos );
1331 isamd_release_block(readpp->is, readpp->cat, readpp->pos);
1332 killblk=readpp->pos;
1335 /* (try to) read next item */
1336 r_ptr= (char *) &r_key;
1337 r_more = isamd_read_item( readpp, &r_ptr);
1345 isamd_reduceblock(pp); /* reduce size if possible */
1347 if (readpp->is->method->debug >4)
1348 logf(LOG_LOG,"isamd_merge: saved last block %d=%d:%d",
1349 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos);
1352 if (readpp->is->method->debug >5)
1353 logf(LOG_LOG,"isamd_merge: closing readpp %d=%d:%d di=%p",
1354 isamd_addr(readpp->pos,readpp->cat), readpp->cat, readpp->pos,
1356 isamd_pp_close(readpp); /* pos is 0 by now, at eof. close works anyway */
1358 if (readpp->is->method->debug >2)
1359 logf(LOG_LOG,"isamd_merge: merge ret f=%d=%d:%d pp=%d=%d:%d",
1360 isamd_addr(firstpp->pos,pp->cat), firstpp->cat, firstpp->pos,
1361 isamd_addr(pp->pos,pp->cat), pp->cat, pp->pos);
1363 retval = isamd_addr(firstpp->pos, firstpp->cat);
1364 isamd_pp_close(firstpp);
1373 /***************************************************************
1375 ***************************************************************/
1377 /* Reads one item and corrects for the diffs, if any */
1378 /* return 1 for ok, 0 for eof */
1379 int isamd_read_item_merge (
1382 struct it_key *p_key, /* the data item that didn't fit*/
1383 ISAMD_I data) /* more input data comes here */
1384 { /* The last two args can be null for ordinary reads */
1388 int winner=0; /* which diff holds the day */
1389 int i; /* looping diffs */
1392 if (pp->diffs==0) /* no diffs, just read the thing */
1393 return isamd_read_main_item(pp,dst);
1396 getDiffInfo(pp, pp->offset);
1399 pp->diffinfo[1].key = *p_key; /* the key merge could not handle */
1401 pp->diffinfo[1].key.sysno=0;
1404 pp->diffinfo[1].maxidx=-1; /* signal we have diffs to read */
1406 pp->diffinfo[1].maxidx=0;
1408 pp->size=pp->offset=pp->is->method->filecat[pp->cat].bsize;
1409 /* this forces a read of the next block at first read */
1415 if (0==pp->diffinfo[0].key.sysno)
1416 { /* 0 is special case, main data. */
1417 keyptr=(char*) &(pp->diffinfo[0].key);
1418 pp->diffinfo[0].mode = ! isamd_read_main_item(pp,&keyptr);
1419 if (pp->is->method->debug > 7)
1420 logf(LOG_LOG,"isamd_read_item: read main %d.%d (%x.%x)",
1421 pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno,
1422 pp->diffinfo[0].key.sysno, pp->diffinfo[0].key.seqno);
1423 } /* get main data */
1425 if ( (0==pp->diffinfo[1].key.sysno) && (-1==pp->diffinfo[1].maxidx) );
1426 { /* 1 is another special case, the input data at merge */
1427 keyptr = (char *) &pp->diffinfo[1].key;
1428 i = (*data->read_item)(data->clientData, &keyptr, &pp->diffinfo[1].mode);
1430 { /* did not get it */
1431 pp->diffinfo[1].key.sysno=0;
1432 pp->diffinfo[1].maxidx=-2; /* stop trying */
1434 if (pp->is->method->debug >6)
1435 logf(LOG_LOG,"merge: read diff m=%d %d.%d (%x.%x)",
1436 pp->diffinfo[1].mode,
1437 pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno,
1438 pp->diffinfo[1].key.sysno, pp->diffinfo[1].key.seqno );
1439 } /* get input data */
1442 for (i=1; (!retry) && (pp->diffinfo[i].decodeData); i++)
1444 if (pp->is->method->debug > 8)
1445 logf(LOG_LOG,"isamd_read_item: considering d%d %d.%d ix=%d mx=%d",
1446 i, pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
1447 pp->diffinfo[i].diffidx, pp->diffinfo[i].maxidx);
1449 if ( (0==pp->diffinfo[i].key.sysno) &&
1450 (pp->diffinfo[i].diffidx < pp->diffinfo[i].maxidx))
1451 {/* read a new one, if possible */
1452 codeptr= codestart = &(pp->diffbuf[pp->diffinfo[i].diffidx]);
1453 keyptr=(char *)&(pp->diffinfo[i].key);
1454 (*pp->is->method->code_item)(ISAMD_DECODE,
1455 pp->diffinfo[i].decodeData, &keyptr, &codeptr);
1456 pp->diffinfo[i].diffidx += codeptr-codestart;
1457 pp->diffinfo[i].mode = pp->diffinfo[i].key.seqno & 1;
1458 pp->diffinfo[i].key.seqno = pp->diffinfo[i].key.seqno >>1 ;
1459 if (pp->is->method->debug > 7)
1460 logf(LOG_LOG,"isamd_read_item: read diff[%d] %d.%d (%x.%x)",i,
1461 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
1462 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
1464 if ( 0!= pp->diffinfo[i].key.sysno)
1465 { /* got a key, compare */
1466 cmp=key_compare(&pp->diffinfo[i].key, &pp->diffinfo[winner].key);
1467 if (0==pp->diffinfo[winner].key.sysno)
1468 cmp=-1; /* end of main sequence, take all diffs */
1471 if (pp->is->method->debug > 8)
1472 logf(LOG_LOG,"isamd_read_item: ins %d<%d %d.%d (%x.%x) < %d.%d (%x.%x)",
1474 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
1475 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
1476 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
1477 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
1478 if (pp->diffinfo[i].mode) /* insert diff, should always be */
1481 assert(!"delete diff for nonexisting item");
1482 /* is an assert too steep here? Not really.*/
1486 if (!pp->diffinfo[i].mode) /* delete diff. should always be */
1488 if (pp->is->method->debug > 8)
1489 logf(LOG_LOG,"isamd_read_item: del %d at%d %d.%d (%x.%x)",
1491 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
1492 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
1493 pp->diffinfo[winner].key.sysno=0; /* delete it */
1496 if (pp->is->method->debug > 2)
1497 logf(LOG_LOG,"isamd_read_item: duplicate ins %d at%d %d.%d (%x.%x)",
1499 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno,
1500 pp->diffinfo[i].key.sysno, pp->diffinfo[i].key.seqno);
1501 /* skip the insert, since we already have it in the base */
1502 /* Should we fail an assertion here??? */
1503 pp->diffinfo[i].key.sysno=0; /* done with the delete */
1504 retry=1; /* start all over again */
1505 } /* matching key */
1506 /* else it is a later key, its turn will come */
1508 } /* for each diff */
1511 if ( pp->diffinfo[winner].key.sysno)
1513 if (pp->is->method->debug > 7)
1514 logf(LOG_LOG,"isamd_read_item: got %d %d.%d (%x.%x)",
1516 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
1517 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
1518 memcpy(*dst, &pp->diffinfo[winner].key, sizeof(struct it_key) );
1519 *dst += sizeof(struct it_key);
1520 pp->diffinfo[winner].key.sysno=0; /* used that one up */
1525 if (pp->is->method->debug > 7)
1526 logf(LOG_LOG,"isamd_read_item: eof w=%d %d.%d (%x.%x)",
1528 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno,
1529 pp->diffinfo[winner].key.sysno, pp->diffinfo[winner].key.seqno);
1530 assert(winner==0); /* if nothing found, nothing comes from a diff */
1535 } /* isamd_read_item */
1538 int isamd_read_item (ISAMD_PP pp, char **dst)
1540 return isamd_read_item_merge(pp,dst,0,0);
1544 /***************************************************************
1546 ***************************************************************/
1550 static int append_diffs(ISAMD is, ISAMD_P ipos, ISAMD_I data)
1552 struct it_key i_key; /* one input item */
1553 char *i_item = (char *) &i_key; /* same as chars */
1556 int i_mode; /* 0 for delete, 1 for insert */
1564 char *c_ptr = codebuff;
1571 firstpp=isamd_pp_open(is, isamd_addr(0,0) );
1572 firstpp->size=firstpp->offset=ISAMD_BLOCK_OFFSET_1;
1573 /* create in smallest category, will expand later */
1574 ++(is->files[0].no_fbuilds);
1578 firstpp=isamd_pp_open(is, ipos);
1579 ++(is->files[0].no_appds);
1582 if (is->method->debug >2)
1583 logf(LOG_LOG,"isamd_appd: Start ipos=%d=%d:%d n=%d=%d:%d nk=%d",
1584 ipos, isamd_type(ipos), isamd_block(ipos),
1585 firstpp->next, isamd_type(firstpp->next), isamd_block(firstpp->diffs),
1587 maxsize = is->method->filecat[firstpp->cat].bsize;
1589 difflenidx = diffidx = firstpp->size;
1591 diffidx+=sizeof(int); /* difflen will be stored here */
1593 /* read first input */
1595 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
1597 if (is->method->debug >6)
1598 logf(LOG_LOG,"isamd_appd: start with m=%d %s",
1599 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
1603 /* store the mode bit inside key */
1604 assert( ((i_key.seqno<<1)>>1) == i_key.seqno); /* can spare the bit */
1605 i_key.seqno = i_key.seqno * 2 + i_mode;
1609 (*is->method->code_item)(ISAMD_ENCODE, firstpp->decodeClientData,
1611 codelen = c_ptr - codebuff;
1612 assert ( (codelen<128) && (codelen>0));
1613 if (is->method->debug >7)
1614 logf(LOG_LOG,"isamd_appd: coded into %d: %s (nk=%d) (ix=%d)",
1615 codelen, hexdump(codebuff, codelen,hexbuff),
1616 firstpp->numKeys,diffidx);
1618 if (diffidx + codelen > maxsize )
1620 if (firstpp->cat < firstpp->is->max_cat)
1621 { /* just increase the block size */
1622 if (firstpp->pos > 0) /* free the old block if allocated */
1623 isamd_release_block(is, firstpp->cat, firstpp->pos);
1625 maxsize = is->method->filecat[firstpp->cat].bsize;
1626 firstpp->pos=0; /* need to allocate it when saving */
1627 if (is->method->debug >3)
1628 logf(LOG_LOG,"isamd_appd: increased diff block to %d (%d)",
1629 firstpp->cat, maxsize);
1632 { /* max size already - can't help, need to merge it */
1633 merge_rc = merge (firstpp, &i_key, data);
1635 return merge_rc; /* merge handled them all ! */
1636 assert(!"merge returned zero ??");
1637 } /* need to merge */
1640 assert ( diffidx+codelen <= maxsize );
1643 memcpy(&(firstpp->buf[diffidx]),codebuff,codelen);
1645 firstpp->size += codelen;
1646 firstpp->offset +=codelen;
1649 firstpp->numKeys++; /* insert diff */
1651 firstpp->numKeys--; /* delete diff */
1653 /* update length of this diff run */
1654 memcpy(&(firstpp->buf[difflenidx]),&diffidx,sizeof(diffidx));
1656 /* (try to) read the next input */
1658 i_more = (*data->read_item)(data->clientData, &i_ptr, &i_mode);
1659 if ( (i_more) && (is->method->debug >6) )
1660 logf(LOG_LOG,"isamd_appd: got m=%d %s",
1661 i_mode, hexdump(i_item,i_ptr-i_item,hexbuff) );
1664 /* clear the next difflen, if room for such */
1665 difflenidx = diffidx;
1666 while ( (difflenidx-diffidx<=sizeof(int)) && (difflenidx<maxsize))
1667 firstpp->buf[difflenidx++]='\0';
1669 if (0==firstpp->pos) /* need to (re)alloc the block */
1670 firstpp->pos = isamd_alloc_block(is, firstpp->cat);
1672 retval = save_first_pp( firstpp );
1673 isamd_pp_close(firstpp);
1676 } /* append_diffs */
1681 /*************************************************************
1682 * isamd_append itself, Sweet, isn't it
1683 *************************************************************/
1685 ISAMD_P isamd_append (ISAMD is, ISAMD_P ipos, ISAMD_I data)
1687 return append_diffs(is,ipos,data);
1688 } /* isamd_append */
1694 #endif /* NEW_ISAM_D */
1699 * $Log: merge-d.c,v $
1700 * Revision 1.18 1999-08-25 18:09:24 heikki
1701 * Starting to optimize
1703 * Revision 1.17 1999/08/24 13:17:42 heikki
1704 * Block sizes, comments
1706 * Revision 1.16 1999/08/24 10:12:02 heikki
1707 * Comments about optimising
1709 * Revision 1.15 1999/08/22 08:26:34 heikki
1712 * Revision 1.14 1999/08/20 12:25:58 heikki
1713 * Statistics in isamd
1715 * Revision 1.13 1999/08/18 13:59:19 heikki
1716 * Fixed another unlikely difflen bug
1718 * Revision 1.12 1999/08/18 13:28:17 heikki
1719 * Set log levels to decent values
1721 * Revision 1.11 1999/08/18 10:37:11 heikki
1722 * Fixed (another) difflen bug
1724 * Revision 1.10 1999/08/18 09:13:31 heikki
1727 * Revision 1.9 1999/08/17 19:46:53 heikki
1728 * Fixed a memory leak
1730 * Revision 1.8 1999/08/07 11:30:59 heikki
1731 * Bug fixing (still a mem leak somewhere)
1733 * Revision 1.7 1999/08/04 14:21:18 heikki
1734 * isam-d seems to be working.
1736 * Revision 1.6 1999/07/23 15:43:05 heikki
1737 * Hunted a few bugs in isam-d. Still crashes on the long test run
1739 * Revision 1.5 1999/07/23 13:58:52 heikki
1740 * merged closer to working, still fails on filling a separate, large block
1742 * Revision 1.4 1999/07/21 14:53:55 heikki
1743 * isamd read and write functions work, except when block full
1744 * Merge missing still. Need to split some functions
1746 * Revision 1.1 1999/07/14 13:14:47 heikki