/*
 * call-seq:
 *   SuffixArray.new(source) -> SuffixArray
 * 
 * Given a string (anything like a string really) this will generate a
 * suffix array for the string so that you can work with it.  The
 * source cannot be an empty string since this is a useless operation.
 */
static VALUE SuffixArray_initialize(VALUE self, VALUE source) 
{
    SuffixArray *sa = NULL;
    size_t i = 0;
    Data_Get_Struct(self, SuffixArray, sa);
    assert(sa != NULL);
    
    // get the source string and assign it to our structure
    sa->source = StringValue(source);
    
    // setup temporary variables for the source and length pointers
    unsigned char *sa_source = RSTRING(sa->source)->ptr;
    size_t sa_source_len = RSTRING(sa->source)->len;
    
    if(sa_source_len == 0) {
        // we can't have this, so return a nil
        rb_raise(cSAError, ERR_NO_ZERO_LENGTH_INPUT);
    }
    
    // printf("last char='%c'\n", sa_source[sa_source_len]);
    
    // allocate memory for the index integers
    sa->suffix_index = malloc(sizeof(int) * (sa_source_len + 1));
    
    // create the suffix array from the source
    int start = bsarray(sa_source, sa->suffix_index, sa_source_len-1);

    // set the suffix_start in our object
    rb_iv_set(self, "@suffix_start", INT2NUM(start));
    
    unsigned char c = sa_source[sa->suffix_index[0]];  // start off with the first char in the sarray list
    sa->starts[c] = 0;
    for(i = 0; i < sa_source_len; i++) {
        // skip characters until we see a new one
        if(sa_source[sa->suffix_index[i]] != c) {
            sa->ends[c] = i-1; // it's -1 since this is a new character, so the end was actually behind this point
            c = sa_source[sa->suffix_index[i]];
            sa->starts[c] = i;
        }
    }
    // set the last valid character to get the tail of the sa, the loop will miss it
    c = sa_source[sa->suffix_index[sa_source_len-1]];
    sa->ends[c] = sa_source_len-1;
    
    return INT2FIX(sa_source_len);
}