
which then causes the associated block to execute: 278 char *rchg1 = dd1->rchg; 279 long *rindex1 = dd1->rindex; 280 281 for (; off1 < lim1; off1++) 282 rchg1[rindex1[off1]] = 1; and then that's the end of the function! 308 return 0; 309 }
It's completely relying on rchg to be filled with zeros before it's called :D that's definitely the issue here. ... or it looks like it to me now ... hopefully if i do that, then this assertion will pass and a new one will rise later.
Ok! So indeed wiping rchg did make that assertion pass. @@ -839,6 +842,9 @@ private: void do_diff() { auto xe = &this->xe; + // for now rchg is wiped as the git/xdiff algorithms assume this + std::memset(xe->xdf1.rchg - 1, 0, (size_t)(xe->xdf1.nrec + 2) * sizeof(*xe->xdf1.rchg)); + std::memset(xe->xdf2.rchg - 1, 0, (size_t)(xe->xdf2.nrec + 2) * sizeof(*xe->xdf2.rchg)); switch (XDF_DIFF_ALG(xp.flags)) { case XDF_PATIENCE_DIFF: mustbe0(xdl_do_patience_diff(&xp, xe)); The next bug was a coding mistake in one of the consume loops, I wasn't walking the linked list resulting in iterating the same node forever: @@ -362,17 +362,20 @@ public: /* if the classifier entry hash been consumed, recover it, then classify the line. otherwise, classify the line, then update the line pointer in case old lines are consumed. */ auto rchash_hi = (long) XDL_HASHLONG(crec->ha, cf->hbits); - auto * rcrec_link = &cf.consumed_rchash[(size_t)rchash_hi]; + xdlclass_t ** prev; xdlclass_t * rcrec; - while (*rcrec_link) { - rcrec = *rcrec_link; + for ( + prev = &cf.consumed_rchash[(size_t)rchash_hi]; + (rcrec = *prev); + prev = &(*prev)->next + ) { if (rcrec->ha == crec->ha && rcrec->size == crec->size) { break; }; } - if (*rcrec_link) { + if (rcrec) { assert(rcrec->next == nullptr && "hash collision in consumed lines; should check for existing matching entry before using consumed entry"); - *rcrec_link = rcrec->next; + *prev = rcrec->next; rcrec->line = crec->ptr; That's two bugs fixed! The next bug is a super-fun one: _a memory error_. I have so many memory checks going on for this code. Here we have it: $ ./diff_xdiff [28/1799] diff_main: Null case. WINDOW RESIZE0=>2 consuming rec ptr 0x7f2942e00a00 consuming rec ptr 0x502000000250 WINDOW RESIZE2=>4 consuming rec ptr 0x7f2942e00a02 consuming rec ptr 0x502000000312 WINDOW RESIZE4=>8 consuming rec ptr 0x7f2942e00a04 consuming rec ptr 0x502000000334 diff_main: Equality. WINDOW RESIZE0=>2 WINDOW RESIZE2=>4 WINDOW RESIZE4=>8 consuming rec ptr 0x7f2942e01200 consuming rec ptr 0x502000000550 consuming rec ptr 0x7f2942e01202 consuming rec ptr 0x502000000552 WINDOW RESIZE8=>16 consuming rec ptr 0x7f2942e01204 ================================================================= ==15016==ERROR: AddressSanitizer: heap-use-after-free on address 0x5070000002f8 at pc 0x55d895dbd0a9 bp 0x7ffeba337450 sp 0x7ffeba337448 READ of size 8 at 0x5070000002f8 thread T0 #0 0x55d895dbd0a8 in AsymmetricStreamingXDiff::assert_no_dangling_pointers() /home/karl3/projects/zinc/src/diff_xdiff.cpp:765 #1 0x55d895dbf402 in AsymmetricStreamingXDiff::extend_env(std::basic_string_view<char, std::char_traits<char> >) /home/karl3/projects/zinc/src/diff_xdiff.cpp:818 #2 0x55d895d9667c in diff /home/karl3/projects/zinc/src/diff_xdiff.cpp:695 #3 0x55d895dec067 in std::__n4861::coroutine_handle<zinc::__generator_promise_base<Diff, zinc::generator<Diff, Diff, zinc::use_allocator_arg> > >::resume() const /usr/include/c++/12/coroutine:244 #4 0x55d895dddad3 in zinc::__generator_promise_base<Diff, zinc::generator<Diff, Diff, zinc::use_allocator_arg> >::resume() ../include/zinc/__generator.hpp:548 #5 0x55d895dcf32d in zinc::generator<Diff, Diff, zinc::use_allocator_arg>::iterator::operator++() ../include/zinc/__generator.hpp:822 #6 0x55d895d9b7d3 in diff_main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1029 #7 0x55d895d9dbc8 in test_diff_xdiff() /home/karl3/projects/zinc/src/diff_xdiff.cpp:1074 #8 0x55d895da4928 in main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1108 #9 0x7f2944a33d67 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 #10 0x7f2944a33e24 in __libc_start_main_impl ../csu/libc-start.c:360 #11 0x55d895d8c640 in _start (/home/karl3/projects/zinc/src/diff_xdiff+0xb4640) (BuildId: 1dcce094fc42295839b19dfd2bae3a3a7afd2442) 0x5070000002f8 is located 56 bytes inside of 80-byte region [0x5070000002c0,0x507000000310) freed by thread T0 here: #0 0x7f29458f3918 in free ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:52 #1 0x55d895db5cb7 in DynamicXDFile::cha_consume_(long) /home/karl3/projects/zinc/src/diff_xdiff.cpp:554 #2 0x55d895db331e in DynamicXDFile::consume(long) /home/karl3/projects/zinc/src/diff_xdiff.cpp:463 #3 0x55d895d96edf in diff /home/karl3/projects/zinc/src/diff_xdiff.cpp:701 #4 0x55d895dec067 in std::__n4861::coroutine_handle<zinc::__generator_promise_base<Diff, zinc::generator<Diff, Diff, zinc::use_allocator_arg> > >::resume() const /usr/include/c++/12/coroutine:244 #5 0x55d895dddad3 in zinc::__generator_promise_base<Diff, zinc::generator<Diff, Diff, zinc::use_allocator_arg> >::resume() ../include/zinc/__generator.hpp:548 #6 0x55d895dcf32d in zinc::generator<Diff, Diff, zinc::use_allocator_arg>::iterator::operator++() ../include/zinc/__generator.hpp:822 #7 0x55d895d9b7d3 in diff_main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1029 #8 0x55d895d9dbc8 in test_diff_xdiff() /home/karl3/projects/zinc/src/diff_xdiff.cpp:1074 #9 0x55d895da4928 in main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1108 #10 0x7f2944a33d67 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 previously allocated by thread T0 here: #0 0x7f29458f4c77 in malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69 #1 0x55d895e12542 in xdl_cha_alloc /home/karl3/projects/zinc/third_party/xdiff/xutils.c:101 #2 0x55d895dae6d4 in DynamicXDFile::extend_pre(s_xpparam*, long, std::basic_string_view<char, std::char_traits<char> >) /home/karl3/projects/zinc/src/diff_xdiff.cpp:357 #3 0x55d895db947b in AsymmetricStreamingXDiff::AsymmetricStreamingXDiff(std::basic_string_view<char, std::char_traits<char> >, long, unsigned long) /home/karl3/projects/zinc/src/diff_xdiff.cpp:668 #4 0x55d895d9ab24 in diff_main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1018 #5 0x55d895d9dbc8 in test_diff_xdiff() /home/karl3/projects/zinc/src/diff_xdiff.cpp:1074 #6 0x55d895da4928 in main /home/karl3/projects/zinc/src/diff_xdiff.cpp:1108 #7 0x7f2944a33d67 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 SUMMARY: AddressSanitizer: heap-use-after-free /home/karl3/projects/zinc/src/diff_xdiff.cpp:765 in AsymmetricStreamingXDiff::assert_no_dangling_pointers() One of the fun things about these memory errors (now in these modern times i seem to have stumbled on when they actually get reported) is that there are three separate stack traces: one for violation, one for deallocation, and one for allocation.