/*
  :call-seq:
  Dvector.fast_fancy_read(stream, options) => Array_of_Dvectors
  
  Reads data from an IO stream and separate it into columns of data
  according to the _options_, a hash holding the following elements
  (compulsory, but you can use FANCY_READ_DEFAULTS):
  * 'sep': a regular expression that separate the entries
  * 'comments': any line matching this will be skipped
  * 'skip_first': skips that many lines before reading anything
  * 'index_col': if true, the first column returned contains the
    number of the line read
  * 'remove_space': whether to remove spaces at the beginning of a line. *This 
    option is currently not implemented !*
  * 'default':  what to put when nothing was found but a number must be used

  As a side note, the read time is highly non-linear, which suggests that
  the read is memory-allocation/copying-limited, at least for big files.
  Well, the read time is non-linear for 


  An internal memory allocation with aggressive policy should solve that,
  that is, not using directly Dvectors (and it would be way faster to store
  anyway).
*/
static VALUE dvector_fast_fancy_read(VALUE self, VALUE stream, VALUE options)
{
  /* First, we read up options: */
  double def = rb_num2dbl(rb_hash_aref(options, 
                                       rb_str_new2("default")));
  int remove_space = RTEST(rb_hash_aref(options, 
                                        rb_str_new2("remove_space")));
  int index_col = RTEST(rb_hash_aref(options, 
                                     rb_str_new2("index_col")));
  long skip_first = FIX2LONG(rb_hash_aref(options, 
                                          rb_str_new2("skip_first")));
  VALUE sep = rb_hash_aref(options, rb_str_new2("sep"));
  VALUE comments = rb_hash_aref(options, rb_str_new2("comments"));

  /* Then, some various variables: */
  VALUE line;

  ID chomp_id = rb_intern("chomp!");
  ID gets_id = rb_intern("gets");
  long line_number = 0;

  /* 
     Now come the fun part - rudimentary vectors management 
   */
  int nb_vectors = 0;           /* The number of vectors currently created */
  int current_size = 10;        /* The number of slots available */
  double ** vectors = ALLOC_N(double *, current_size);
  long index = 0;               /* The current index in the vectors */
  int allocated_size = 5004;    /* The size available in the vectors */


  int i;

  /* The return value */
  VALUE ary;

  /* We use a real gets so we can also rely on StringIO, for instance */
  while(RTEST(line = rb_funcall(stream, gets_id, 0))) {
    VALUE pre, post, match;
    const char * line_ptr;
    int col = 0;
    line_number++;
    /* Whether we should skip the line... */
    if(skip_first >= line_number)
      continue;

    /* We check for a blank line using isspace: */
    line_ptr = StringValueCStr(line);
    while(line_ptr && *line_ptr) {
      if(! isspace(*line_ptr))
        break;
      line_ptr++;
    }
    if(! *line_ptr)
      continue;                 /* We found a blank line  */
    if(remove_space)            /* We replace the contents of the line  */
      line = rb_str_new2(line_ptr);

    /* ... or a comment line */
    if(RTEST(comments) && RTEST(rb_reg_match(comments, line))) 
      continue;

    /* Then, we remove the newline: */
    post = line;
    rb_funcall(post, chomp_id, 0);

    /* We iterate over the different portions between
       matches
    */
    while(RTEST(post)) {
      const char * a;
      char * b;
      if(RTEST(rb_reg_match(sep, post))) {
        match = rb_gv_get("$~");
        pre = rb_reg_match_pre(match);
        post = rb_reg_match_post(match);
      }
      else {
        pre = post;
        post = Qnil;
      }
      a = StringValueCStr(pre);
      double c = strtod(a, &b);
      if(b == a) 
        c = def;
      if(col >= nb_vectors) {
        nb_vectors++;
        /* We need to create a new vector */
        if(col >= current_size) { /* Increase the available size */
          current_size += 5;
          REALLOC_N(vectors, double * , current_size);
        }
        
        double * vals = vectors[col] = ALLOC_N(double, allocated_size);
        /* Filling it with the default value */
        for(i = 0; i < index; i++) {
          vals[i] = def;
        }
      }
      vectors[col][index] = c;
      col++;
    }
    /* Now, we finish the line */
    for(; col < nb_vectors; col++)
      vectors[col][index] = def;
    index++;
    /* Now, we reallocate memory if necessary */
    if(index >= allocated_size) {
      allocated_size *= 2;      /* We double the size */
      for(col = 0; col < nb_vectors; col++)
        REALLOC_N(vectors[col], double, allocated_size);
    }
  }
  /* Now, we make up the array */
  ary = rb_ary_new();
  for(i = 0; i < nb_vectors; i++) {
    /* We create a vector */
    rb_ary_store(ary, i, make_dvector_from_data(cDvector, index, vectors[i]));
    /* And free the memory */
    free(vectors[i]);
  }
  free(vectors);
  return ary;
}