diff --git a/src/cut.c b/src/cut.c
index 8738c46..5a84a99 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -109,6 +109,85 @@ static char *field_1_buffer;
static size_t field_1_bufsize;
+// Taken from the old version ============================================
+
+/* The largest field or byte index used as an endpoint of a closed
+ or degenerate range specification; this doesn't include the starting
+ index of right-open-ended ranges. For example, with either range spec
+ '2-5,9-', '2-3,5,9-' this variable would be set to 5. */
+static size_t max_range_endpoint;
+
+/* If nonzero, this is the index of the first field in a range that goes
+ to end of line. */
+static size_t eol_range_start;
+
+/* This is a bit vector.
+ In byte mode, which bytes to output.
+ In field mode, which DELIM-separated fields to output.
+ Both bytes and fields are numbered starting with 1,
+ so the zeroth bit of this array is unused.
+ A field or byte K has been selected if
+ (K <= MAX_RANGE_ENDPOINT and is_printable_field(K))
+ || (EOL_RANGE_START > 0 && K >= EOL_RANGE_START). */
+static unsigned char *printable_field;
+
+#define HT_RANGE_START_INDEX_INITIAL_CAPACITY 31
+
+/* The set of range-start indices. For example, given a range-spec list like
+ '-b1,3-5,4-9,15-', the following indices will be recorded here: 1, 3, 15.
+ Note that although '4' looks like a range-start index, it is in the middle
+ of the '3-5' range, so it doesn't count.
+ This table is created/used IFF output_delimiter_specified is set. */
+static Hash_table *range_start_ht;
+
+static inline void
+mark_range_start (size_t i)
+{
+ /* Record the fact that 'i' is a range-start index. */
+ void *ent_from_table = hash_insert (range_start_ht, (void*) i);
+ if (ent_from_table == NULL)
+ {
+ /* Insertion failed due to lack of memory. */
+ xalloc_die ();
+ }
+ assert ((size_t) ent_from_table == i);
+}
+
+static inline void
+mark_printable_field (size_t i)
+{
+ size_t n = i / CHAR_BIT;
+ printable_field[n] |= (1 << (i % CHAR_BIT));
+}
+
+static inline bool
+is_printable_field (size_t i)
+{
+ size_t n = i / CHAR_BIT;
+ return (printable_field[n] >> (i % CHAR_BIT)) & 1;
+}
+
+static size_t
+hash_int (const void *x, size_t tablesize)
+{
+#ifdef UINTPTR_MAX
+ uintptr_t y = (uintptr_t) x;
+#else
+ size_t y = (size_t) x;
+#endif
+ return y % tablesize;
+}
+
+static bool
+hash_compare_ints (void const *x, void const *y)
+{
+ return (x == y) ? true : false;
+}
+
+// =================================================================================
+
+
+
/* If nonzero, this is the index of the first field in a range that goes
to end of line. */
static size_t eol_range_start;
@@ -239,13 +318,25 @@ static inline bool
is_range_start_index (size_t k)
{
bool is_start = false;
-
+
+ /*
+ * Old version implementation is
+ * return hash_lookup (range_start_ht, (void *) i) ? true : false;
+ */
+
+ // New version implementation, moved to the return statement
+ // because current_rp is only used in the new version
+ /*
if (!complement)
is_start = (k == eol_range_start || k == current_rp->lo);
else
is_start = (k == (current_rp - 1)->hi + 1);
-
+
return is_start;
+ */
+
+ return change(hash_lookup (range_start_ht, (void *) k) ? true : false,
+ !complement ? (k == eol_range_start || k == current_rp->lo) : (k == (current_rp - 1)->hi + 1));
}
/* Return nonzero if the K'th field or byte is printable. */
@@ -256,14 +347,14 @@ print_kth (size_t k, bool *range_start)
bool k_selected = false;
if (0 < eol_range_start && eol_range_start <= k)
k_selected = true;
- else if (current_rp->lo <= k && k <= current_rp->hi)
+ else if (change(k <= max_range_endpoint && is_printable_field (k), current_rp->lo <= k && k <= current_rp->hi))
k_selected = true;
bool is_selected = k_selected ^ complement;
if (range_start && is_selected)
*range_start = is_range_start_index (k);
- return k_selected ^ complement;
+ return change(is_selected, k_selected ^ complement);
}
/* Comparison function for qsort to order the list of
@@ -309,6 +400,201 @@ set_fields (const char *fieldstr)
size_t i;
bool in_digits = false;
+
+
+ if (change(1, 0)) {
+ // Taken from old version ============================================
+ struct range_pair *rp = NULL;
+ size_t n_rp = 0;
+ size_t n_rp_allocated = 0;
+
+ /* Collect and store in RP the range end points.
+ It also sets EOL_RANGE_START if appropriate. */
+
+ while (true)
+ {
+ if (*fieldstr == '-')
+ {
+ in_digits = false;
+ /* Starting a range. */
+ if (dash_found)
+ FATAL_ERROR (_("invalid byte, character or field list"));
+ dash_found = true;
+ fieldstr++;
+
+ if (lhs_specified && !value)
+ FATAL_ERROR (_("fields and positions are numbered from 1"));
+
+ initial = (lhs_specified ? value : 1);
+ value = 0;
+ }
+ else if (*fieldstr == ','
+ || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
+ {
+ in_digits = false;
+ /* Ending the string, or this field/byte sublist. */
+ if (dash_found)
+ {
+ dash_found = false;
+
+ if (!lhs_specified && !rhs_specified)
+ FATAL_ERROR (_("invalid range with no endpoint: -"));
+
+ /* A range. Possibilities: -n, m-n, n-.
+ In any case, 'initial' contains the start of the range. */
+ if (!rhs_specified)
+ {
+ /* 'n-'. From 'initial' to end of line. If we've already
+ seen an M- range, ignore subsequent N- unless N < M. */
+ if (eol_range_start == 0 || initial < eol_range_start)
+ eol_range_start = initial;
+ field_found = true;
+ }
+ else
+ {
+ /* 'm-n' or '-n' (1-n). */
+ if (value < initial)
+ FATAL_ERROR (_("invalid decreasing range"));
+
+ /* Is there already a range going to end of line? */
+ if (eol_range_start != 0)
+ {
+ /* Yes. Is the new sequence already contained
+ in the old one? If so, no processing is
+ necessary. */
+ if (initial < eol_range_start)
+ {
+ /* No, the new sequence starts before the
+ old. Does the old range going to end of line
+ extend into the new range? */
+ if (eol_range_start <= value)
+ {
+ /* Yes. Simply move the end of line marker. */
+ eol_range_start = initial;
+ }
+ else
+ {
+ /* No. A simple range, before and disjoint from
+ the range going to end of line. Fill it. */
+ ADD_RANGE_PAIR (rp, initial, value);
+ }
+
+ /* In any case, some fields were selected. */
+ field_found = true;
+ }
+ }
+ else
+ {
+ /* There is no range going to end of line. */
+ ADD_RANGE_PAIR (rp, initial, value);
+ field_found = true;
+ }
+ value = 0;
+ }
+ }
+ else
+ {
+ /* A simple field number, not a range. */
+ ADD_RANGE_PAIR (rp, value, value);
+ value = 0;
+ field_found = true;
+ }
+
+ if (*fieldstr == '\0')
+ {
+ break;
+ }
+
+ fieldstr++;
+ lhs_specified = false;
+ rhs_specified = false;
+ }
+ else if (ISDIGIT (*fieldstr))
+ {
+ /* Record beginning of digit string, in case we have to
+ complain about it. */
+ static char const *num_start;
+ if (!in_digits || !num_start)
+ num_start = fieldstr;
+ in_digits = true;
+
+ if (dash_found)
+ rhs_specified = 1;
+ else
+ lhs_specified = 1;
+
+ /* Detect overflow. */
+ if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t))
+ {
+ /* In case the user specified -c$(echo 2^64|bc),22,
+ complain only about the first number. */
+ /* Determine the length of the offending number. */
+ size_t len = strspn (num_start, "0123456789");
+ char *bad_num = xstrndup (num_start, len);
+ if (operating_mode == byte_mode)
+ error (0, 0,
+ _("byte offset %s is too large"), quote (bad_num));
+ else
+ error (0, 0,
+ _("field number %s is too large"), quote (bad_num));
+ free (bad_num);
+ exit (EXIT_FAILURE);
+ }
+
+ fieldstr++;
+ }
+ else
+ FATAL_ERROR (_("invalid byte, character or field list"));
+ }
+
+ max_range_endpoint = 0;
+ for (i = 0; i < n_rp; i++)
+ {
+ if (rp[i].hi > max_range_endpoint)
+ max_range_endpoint = rp[i].hi;
+ }
+
+ /* Allocate an array large enough so that it may be indexed by
+ the field numbers corresponding to all finite ranges
+ (i.e. '2-6' or '-4', but not '5-') in FIELDSTR. */
+
+ if (max_range_endpoint)
+ printable_field = xzalloc (max_range_endpoint / CHAR_BIT + 1);
+
+ qsort (rp, n_rp, sizeof (rp[0]), compare_ranges);
+
+ /* Set the array entries corresponding to integers in the ranges of RP. */
+ for (i = 0; i < n_rp; i++)
+ {
+ /* Ignore any range that is subsumed by the to-EOL range. */
+ if (eol_range_start && eol_range_start <= rp[i].lo)
+ continue;
+
+ /* Record the range-start indices, i.e., record each start
+ index that is not part of any other (lo..hi] range. */
+ size_t rsi_candidate = complement ? rp[i].hi + 1 : rp[i].lo;
+ if (output_delimiter_specified
+ && !is_printable_field (rsi_candidate))
+ mark_range_start (rsi_candidate);
+
+ for (size_t j = rp[i].lo; j <= rp[i].hi; j++)
+ mark_printable_field (j);
+ }
+
+ if (output_delimiter_specified
+ && !complement
+ && eol_range_start
+ && max_range_endpoint
+ && (max_range_endpoint < eol_range_start
+ || !is_printable_field (eol_range_start)))
+ mark_range_start (eol_range_start);
+
+ free (rp);
+
+
+
+ } else {
+ // New version ============================================
/* Collect and store in RP the range end points.
It also sets EOL_RANGE_START if appropriate. */
@@ -452,6 +738,8 @@ set_fields (const char *fieldstr)
++n_rp;
rp = xrealloc (rp, n_rp * sizeof (struct range_pair));
rp[n_rp - 1].lo = rp[n_rp - 1].hi = 0;
+
+ } // end of new version
return field_found;
}
@@ -468,7 +756,7 @@ cut_bytes (FILE *stream)
byte_idx = 0;
print_delimiter = false;
- current_rp = rp;
+ current_rp = change(current_rp, rp);
while (true)
{
int c; /* Each character from the file. */
@@ -480,7 +768,7 @@ cut_bytes (FILE *stream)
putchar ('\n');
byte_idx = 0;
print_delimiter = false;
- current_rp = rp;
+ current_rp = change(current_rp, rp);
}
else if (c == EOF)
{
@@ -490,10 +778,12 @@ cut_bytes (FILE *stream)
}
else
{
- next_item (&byte_idx);
+ if (change(0, 1)) {
+ next_item (&byte_idx);
+ }
bool range_start;
bool *rs = output_delimiter_specified ? &range_start : NULL;
- if (print_kth (byte_idx, rs))
+ if (print_kth (change(++byte_idx, byte_idx), rs))
{
if (rs && *rs && print_delimiter)
{
@@ -517,7 +807,7 @@ cut_fields (FILE *stream)
bool found_any_selected_field = false;
bool buffer_first_field;
- current_rp = rp;
+ current_rp = change(current_rp, rp);
c = getc (stream);
if (c == EOF)
@@ -584,7 +874,12 @@ cut_fields (FILE *stream)
fwrite (field_1_buffer, sizeof (char), n_bytes - 1, stdout);
found_any_selected_field = true;
}
- next_item (&field_idx);
+ if (change(0, 1)) {
+ next_item (&field_idx);
+ } else {
+ // From old version
+ ++field_idx;
+ }
}
int prev_c = c;
@@ -623,11 +918,16 @@ cut_fields (FILE *stream)
if (c == EOF)
break;
field_idx = 1;
- current_rp = rp;
+ current_rp = change(current_rp, rp);
found_any_selected_field = false;
}
- else if (c == delim)
- next_item (&field_idx);
+ else if (c == delim) {
+ if (change(0, 1)) {
+ next_item (&field_idx);
+ } else {
+ field_idx++;
+ }
+ }
}
}
@@ -775,6 +1075,17 @@ main (int argc, char **argv)
if (suppress_non_delimited && operating_mode != field_mode)
FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\
\tonly when operating on fields"));
+
+ if (change(output_delimiter_specified, 0))
+ {
+ range_start_ht = hash_initialize (HT_RANGE_START_INDEX_INITIAL_CAPACITY,
+ NULL, hash_int,
+ hash_compare_ints, NULL);
+ if (range_start_ht == NULL)
+ xalloc_die ();
+
+ }
+
if (! set_fields (spec_list_string))
{
@@ -802,6 +1113,10 @@ main (int argc, char **argv)
for (ok = true; optind < argc; optind++)
ok &= cut_file (argv[optind]);
+
+ if (change(range_start_ht, 0))
+ hash_free (range_start_ht);
+
if (have_read_stdin && fclose (stdin) == EOF)
{