/*
 * call-seq:
 *   sarray.longest_nonmatch(target, from_index, min_match) -> [non_match_length, match_start, match_length]
 *
 * Mostly the inverse of longest_match, except that it first tries to find a
 * non-matching region, then a matching region.  The target and from_index are
 * the same as in longest_match.  The min_match argument is the smallest matching
 * region that you'll accept as significant enough to end the non-matching search.
 * Giving non_match=0 will stop at the first matching region.
 *
 * It works by first searching the suffix array for a non-matching region.  When it 
 * hits a character that is in the source (according to the suffix array) it tries
 * to find a matching region.  If it can find a matching region that is longer than min_match
 * then it stops and returns, otherwise it adds this match to the length of the non-matching
 * region and continues.
 *
 * The return value is an Array of [non_match_length, match_start, match_length].
 */
static VALUE SuffixArray_longest_nonmatch(VALUE self, VALUE target, VALUE from_index, VALUE min_match) 
{
    SuffixArray *sa = NULL;
    Data_Get_Struct(self, SuffixArray, sa);

    if(sa == NULL || sa->suffix_index == NULL || RSTRING(sa->source)->len == 0) {
        rb_raise(cSAError, ERR_NOT_INITIALIZED);
    }
    
    // get the from and for_length arguments as unsigned ints
    size_t from = NUM2UINT(from_index);
    size_t min = NUM2INT(min_match);
    
    // get better pointers for the source (should already be in String form)
    unsigned char *source_ptr = RSTRING(sa->source)->ptr;
    size_t source_len = RSTRING(sa->source)->len;

    // get the target as a string
    VALUE target_str = StringValue(target);
    
    // better pointers again, we also need target_len as an in/out parameter
    unsigned char *target_ptr = RSTRING(target)->ptr;
    size_t target_len = RSTRING(target)->len;

    // check the input for validity, returning nil like in array operations
    if(from > target_len) {
        return Qnil;
    }
    
    
    // adjust for the from and for_length settings to be within the target len
    unsigned char *scan = target_ptr + from;
    unsigned char *end = target_ptr + target_len;
    size_t match_len = 0;
    size_t match_start = 0;
    while(scan < end) {
        if(*scan != source_ptr[sa->suffix_index[sa->starts[*scan]]]) {
            // printf("not found: %c\n", *scan);
            scan ++;
        } else {
            // search remaining stuff for a possible match, which return as a result as well
            match_len = end - scan;
            // printf("finding:  match_len=%u, first char='%c', index=%u\n", match_len, *scan, scan - target_ptr);
            match_start = find_longest_match(source_ptr, source_len, scan, &match_len, 
                                              sa->starts, sa->ends, sa->suffix_index);
            
            if(match_len == 0) {
                // match not found, which really shouldn't happen
                break;
            } else if(match_len > min) {
                // the match is possibly long enough, drop out
                break;
            } else {
                // the number of possibly matching characters is much too small, so we continue by skipping them
                scan += match_len;
                // reset the match_len and match_start to 0 to signal that a match hasn't been found yet
                match_len = match_start = 0;
            }
        } 
    }

    VALUE result = rb_ary_new();
    
    size_t nonmatch_len = (scan - (target_ptr + from));
    // printf("nonmatch_len=%u,match_start=%u, match_len=%u\n", nonmatch_len, match_start, match_len);
    rb_ary_push(result, INT2FIX(nonmatch_len));
    rb_ary_push(result, INT2FIX(match_start));
    rb_ary_push(result, INT2FIX(match_len));

    return result;
}