
887 mustbe0(xdl_recs_cmp(&dd1, 0, dd1.nrec, &dd2, 0, dd2.nrec, 888 kvdf, kvdb, (xp.flags & XDF_NEED_MINIMAL) != 0, 889 &xenv));
878 dd1.nrec = xe->xdf1.nreff; // this is the number of lines in file 1 879 dd1.ha = xe->xdf1.ha; // this is the "hash" of every line. it's actually an index into a hash list, same thing 880 dd1.rchg = xe->xdf1.rchg; // this is the change vector -- the algorithm sets 1 to indicate the line does not match 881 dd1.rindex = xe->xdf1.rindex; // this is the vector of pointers to line data including the actual content
(gdb) s xdl_recs_cmp (dd1=0x7ffff4b08880, off1=0, lim1=2, dd2=0x7ffff4b088c0, off2=0, lim2=1, kvdf=0x50c000000290, kvdb=0x50c0000002c0, need_min=1, xenv=0x7ffff4b08840) at /home/karl3/projects/zinc/third_party/xdiff/xdiffi.c:259 259 unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha; (gdb) list 254 * (marking changed lines) is done in the two boundary reaching checks. 255 */ 256 int xdl_recs_cmp(diffdata_t *dd1, long off1, long lim1, 257 diffdata_t *dd2, long off2, long lim2, 258 long *kvdf, long *kvdb, int need_min, xdalgoenv_t *xenv) { 259 unsigned long const *ha1 = dd1->ha, *ha2 = dd2->ha; 260 261 /*
262 * Shrink the box by walking through each diagonal snake (SW and NE). 263 */ (gdb) list 264 for (; off1 < lim1 && off2 < lim2 && ha1[off1] == ha2[off2]; off1++, off2++); 265 for (; off1 < lim1 && off2 < lim2 && ha1[lim1 - 1] == ha2[lim2 - 1]; lim1--, lim2--); 266 267 /* 268 * If one dimension is empty, then all records on the other one must 269 * be obviously changed. 270 */ 271 if (off1 == lim1) { 272 char *rchg2 = dd2->rchg; 273 long *rindex2 = dd2->rindex; (gdb) list 274 275 for (; off2 < lim2; off2++) 276 rchg2[rindex2[off2]] = 1; 277 } else if (off2 == lim2) { 278 char *rchg1 = dd1->rchg; 279 long *rindex1 = dd1->rindex; 280 281 for (; off1 < lim1; off1++) 282 rchg1[rindex1[off1]] = 1; 283 } else { (gdb) list 284 xdpsplit_t spl; 285 spl.i1 = spl.i2 = 0; 286 287 /* 288 * Divide ... 289 */ 290 if (xdl_split(ha1, off1, lim1, ha2, off2, lim2, kvdf, kvdb, 291 need_min, &spl, xenv) < 0) { 292 293 return -1; (gdb) list 294 } 295 296 /* 297 * ... et Impera. 298 */ 299 if (xdl_recs_cmp(dd1, off1, spl.i1, dd2, off2, spl.i2, 300 kvdf, kvdb, spl.min_lo, xenv) < 0 || 301 xdl_recs_cmp(dd1, spl.i1, lim1, dd2, spl.i2, lim2, 302 kvdf, kvdb, spl.min_hi, xenv) < 0) { 303 (gdb) list 304 return -1; 305 } 306 } 307 308 return 0; 309 }
so in 264 and 265 above, it's looking for starting and ending lines that match, and increasing off and decreasing lim to narrow the problem domain
after these lines it then runs some quick checks for unchangedness, if either pair of offs and lims end up touching. we won't have that condition as the window is only partly filled. the ending lines will mismatch; the condition ha1[lim1 - 1] == ha2[lim2 - 1] will be false.
i guess i'd better step into that and verify i'm right!
so it turns out I'm _wrong_ . although the second loop indeed doesn't exhaust because: (gdb) p ha1[lim1 - 1] $16 = 2 (gdb) p ha2[lim2 - 1] $17 = 1 the fact the first loop exhausts still meets: 277 } else if (off2 == lim2) { which then causes the associated block to execute: 278 char *rchg1 = dd1->rchg; 279 long *rindex1 = dd1->rindex; 280 281 for (; off1 < lim1; off1++) 282 rchg1[rindex1[off1]] = 1; and then that's the end of the function! 308 return 0; 309 } So basically all the function did was assign 1 to rchg1[off1 thru lim1] and that's it! It's completely relying on rchg to be filled with zeros before it's called :D that's definitely the issue here. ... or it looks like it to me now ... hopefully if i do that, then this assertion will pass and a new one will rise later. i'm expecting they won't all pass because the streaming window is too small right now. but i do want it to produce outputs that are the best it could find given that limit.