/* linecut -- output selected lines from files
   Copyright (C) 2007, 2008 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

/* Steven Schubiger <stsc@members.fsf.org> */

#include <config.h>

#include <stdio.h>
#include <getopt.h>
#include <stdlib.h>
#include <stdbool.h>
#include <gettext.h>
#include <errno.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <locale.h>

#include "binary-io.h"
#include "closeout.h"
#include "error.h"
#include "full-write.h"
#include "quote.h"
#include "safe-read.h"
#include "xalloc.h"

/* Shortcut for gettext().  */
#define _(msgid) gettext (msgid)

/* Clearer version of strcmp().  */
#define STREQ(a, b) (strcmp (a, b) == 0)

/* ISDIGIT differs from isdigit, as follows:
   - Its arg may be any int or unsigned int; it need not be an unsigned char
     or EOF.
   - It's typically faster.
   POSIX says that only '0' through '9' are digits.  Prefer ISDIGIT to
   isdigit unless it's important to use the locale's definition
   of `digit' even when the host does not conform to POSIX.  */
#define ISDIGIT(c) ((unsigned int) (c) - '0' <= 9)

/* Macro for validation of range components. */
#define VALID_RANGE(range) (ISDIGIT (range) || (range) == '-' || (range) == '+')

/* The official name of this program (e.g., no `g' prefix).  */
#define PROGRAM_NAME "linecut"

/* The version of this program (floating-point value). */
#define PROGRAM_VERSION 0.5

/* The authors of this program. */
#define AUTHORS "Steven Schubiger"

/* The address to send bug-reports to. */
#define BUGS_MAIL_ADDRESS "stsc@members.fsf.org"

/* Maximum of allowed range sets. */
#define MAX_RANGE_SETS 16

/* If true, argument to range parameter provided.  */
static bool range_mode;

/* If true, number output lines. */
static bool number_mode;

/* If true, print filename headers. */
static bool print_headers;

/* When to print the filename banners. */
enum header_mode
{
  multiple_files, always, never
};

/* Are we reading from standard input?  */
static bool is_stdin;

/* Have we ever read standard input?  */
static bool have_read_stdin;

/* The name this program was run with. */
static char *program_name;

/* The iterator for the ranges set. */
static int range_max;

/* The sets of ranges. */
static long ranges[MAX_RANGE_SETS * 2];

/* The file offsets to seek for. */
static off_t offsets[MAX_RANGE_SETS];

/* How many lines the stream consists of. */
static uintmax_t lines;

/* Container for file-descriptor and handle. */
static struct 
  {
    int fd;
    FILE *fh;
  } 
input;

/* Buffer for line numbers.
   An 11 digit counter may overflow within an hour on a P2/466,
   an 18 digit counter needs about 1000y */
#define LINE_COUNTER_BUF_LEN 20
static char line_buf[LINE_COUNTER_BUF_LEN] =
  {
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
    ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0',
    '\t', '\0'
  };

/* Position in `line_buf' where printing starts.  This will not change
   unless the number of lines is larger than 999999.  */
static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8;

/* Position of the first digit in `line_buf'.  */
static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3;

/* Position of the last digit in `line_buf'.  */
static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;

static struct option const long_options[] =
{
  {"number", no_argument, NULL, 'n'},
  {"range", required_argument, NULL, 'r'},
  {"quiet", no_argument, NULL, 'q'},
  {"silent", no_argument, NULL, 'q'},
  {"verbose", no_argument, NULL, 'v'},
  {"help", no_argument, NULL, 'h' },
  {"version", no_argument, NULL, 'V' },
  {NULL, 0, NULL, 0}
};

static void
usage (int status)
{
  if (status != EXIT_SUCCESS)
    fprintf (stderr, _("Try `%s --help' for more information.\n"),
         program_name);
  else
    {
      printf (_("\
Usage: %s [OPTION]... [-r/--range] RANGE [FILE]...\n\
"), program_name);
      fputs (_("\
Print selected lines of each FILE to standard output.\n\
With more than one FILE, precede the output of each file with a header\n\
giving its file name. With no FILE, or when FILE is -, read standard input.\n\
\n\
"), stdout);
      fputs (_("\
Mandatory arguments to long options are also mandatory for short options.\n\
"), stdout);
      fputs (_("\
  -n, --number             number all output lines\n\
  -r, --range=RANGE        selected lines to be output\n\
"), stdout);
      fputs (_("\
  -q, --quiet, --silent    never print headers giving file names\n\
  -v, --verbose            always print headers giving file names\n\
"), stdout);
      fputs (_("\
  --help                   display this help and exit\n\
  --version                output version information and exit\n\
"), stdout);
      fputs (_("\
\n\
RANGE must conform to following format:\n\
LINE_START:LINE_END, ...\n\
LINE_START or LINE_END may be an absolute line position or relative-to-EOF one.\n\
\n\
An absolute line position is denoted by a bare number (i.e., no prefix).\n\
A relative-to-EOF line position is denoted by a number with a minus sign as\n\
prefix. LINE_END may have a plus sign as prefix which denotes advancement of\n\
lines beginning from LINE_START.\n\
\n\
If exactly one incomplete set is provided (with a separating colon) then certain\n\
defaults will be assumed: If LINE_START is missing it will default to start\n\
from first line. If LINE_END is missing it will default to end at last line.\n\
"), stdout);
      fputs (_("\
\n\
Examples: \"1:-1\", \"5:10\", \"-10:-2\", \"1:2,5:6,-5:-1\", \":-3\", \"6:\", \"1:+3\"\n\
"), stdout);
      printf (_("\
\n\
Report bugs to <%s>.\n\
"), BUGS_MAIL_ADDRESS);
    }
  exit (status);
}

static void
version (void)
{
  printf (_("\
%s %.1f\n\
"), PROGRAM_NAME, PROGRAM_VERSION);
  fputs (_("\
Copyright (C) 2007, 2008 Free Software Foundation, Inc.\n\
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
This is free software: you are free to change and redistribute it.\n\
There is NO WARRANTY, to the extent permitted by law.\n\
"), stdout);
  printf (_("\
\n\
Written by %s.\n\
"), AUTHORS);
  exit (EXIT_SUCCESS);
}

static void
write_header (const char *filename)
{
  static bool first_file = true;

  printf ("%s==> %s <==\n", (first_file ? "" : "\n"), filename);
  first_file = false;
}

static void
next_line_num (void)
{
  char *endp = line_num_end;
  do
    {
      if ((*endp)++ < '9')
        return;
      *endp-- = '0';
    }
  while (endp >= line_num_start);
  if (line_num_start > line_buf)
    *--line_num_start = '1';
  else
    *line_buf = '>';
  if (line_num_start < line_num_print)
    line_num_print--;
}

static bool
elide_lines_seekable (const char *filename, int fd,
                      uintmax_t line_from, uintmax_t line_to,
                      off_t offset)
{
  char buffer[BUFSIZ];
  char *buf, *eol;
  bool seen_complete_line;
  uintmax_t line_current;

  seen_complete_line = true;

  if ((lseek (fd, offset, SEEK_SET)) < 0)
    {
      error (0, errno, _("cannot lseek %s"), quote (filename));
      return false;
    }
  line_current = line_from;
  while (1)
    {
      size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
      size_t bytes_to_write;

      if (bytes_read == SAFE_READ_ERROR)
        {
          error (0, errno, _("error reading %s"), quote (filename));
          return false;
        }
      if (bytes_read == 0)
        break;
      buf = buffer;
      bytes_to_write = bytes_read;
      for (; line_current <= line_to; line_current++)
        {
          if (bytes_to_write)
            {
              /* Full line with trailing newline detected. */
              if (eol = memchr (buf, '\n', bytes_to_write))
                {
                  if (number_mode && seen_complete_line)
                    {
                      if (full_write (STDOUT_FILENO, line_num_print, strlen (line_num_print)) 
                          != strlen (line_num_print))
                        {
                          error (0, errno, _("write error"));
                          return false;
                        }
                      next_line_num ();
                    }
                  seen_complete_line = true;

                  if (full_write (STDOUT_FILENO, buf, eol - buf) != eol - buf)
                    {
                      error (0, errno, _("write error"));
                      return false;
                    }
                  if (full_write (STDOUT_FILENO, "\n", 1) != 1)
                    {
                      error (0, errno, _("write error"));
                      return false;
                    }

                  bytes_to_write -= eol - buf + 1; /* + 1 for subtracting the newline too. */
                  buf = eol + 1; /* + 1 because we advance beyond the end-of-line. */
                }
              /* Part of line with no trailing newline. */
              else
                {
                  if (number_mode)
                    {
                      if (full_write (STDOUT_FILENO, line_num_print, strlen (line_num_print))
                          != strlen (line_num_print))
                        {
                          error (0, errno, _("write error"));
                          return false;
                        }
                      next_line_num ();
                    }
                  if (full_write (STDOUT_FILENO, buf, bytes_to_write) != bytes_to_write)
                    {
                      error (0, errno, _("write error"));
                      return false;
                    }
                  seen_complete_line = false;
                  break;
               }
            }
          else
            break;
        }
    }
  return true;
}

static void
relative_range_to_absolute (long *range_pos) 
{
  if (*range_pos < 0)
    *range_pos += lines + 1;
}

static bool
range_lines (const char *filename, int fd)
{
  bool ok;
  int c, i;
  long line_from, line_to;
  off_t offset;

  ok = true;

  if (print_headers)
    write_header (filename);

  for (c = 0, i = 0; i <= range_max; c++, i += 2)
    {
      line_from = ranges[i];
      line_to = ranges[i+1];

      relative_range_to_absolute (&line_from);
      relative_range_to_absolute (&line_to);

      offset = offsets[c];

      ok &= elide_lines_seekable (filename, fd, line_from, line_to, offset);
    }
  return ok;
}

static bool
determine_seek_offsets (const char *filename, int fd)
{
  char buffer[BUFSIZ];
  int c, i;
  long line_iter, range_abs;
  size_t bytes_offset;
  off_t seek_offset;

  c = 0;
  i = 0;
  bytes_offset = 0;
  line_iter = 0;
  seek_offset = 0;

  if ((lseek (fd, 0, SEEK_SET)) < 0)
    {
      error (0, errno, _("cannot lseek %s"), quote (filename));
      return false;
    }
  while (1)
    {
      size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
      size_t bytes_to_iter = 0;

      if (bytes_read == SAFE_READ_ERROR)
        {
          error (0, errno, _("error reading %s"), quote (filename));
          return false;
        }
      if (bytes_read == 0)
        break;
      while (bytes_to_iter < bytes_read)
        {
          if (buffer[bytes_to_iter] == '\n')
            {
              line_iter++;
              break;
            }
          bytes_to_iter++;
        }
      bytes_offset += bytes_to_iter + 1;

      range_abs = ranges[i];
      relative_range_to_absolute (&range_abs);
      if (line_iter == range_abs)
        {
          offsets[c++] = seek_offset;
          i += 2;
        }
      /* We do not need to determine seek offsets past the last starting 
         range line. This will mostly avoid a tremendous speed impact,
         when input is huge.  */
      if (i > range_max)
        break;
      if ((seek_offset = lseek (fd, bytes_offset, SEEK_SET)) < 0)
        {
          error (0, errno, _("cannot lseek %s"), quote (filename));
          return false;
        }
    }
  return true;
}

static bool
count_lines_in_stream (const char *filename, int fd)
{
  char buffer[BUFSIZ];
  uintmax_t total_lines;

  if ((lseek (fd, 0, SEEK_SET)) < 0)
    {
      error (0, errno, _("cannot lseek %s"), quote (filename));
      return false;
    }
  total_lines = 0;
  while (1)
    {
      size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
      size_t bytes_to_iter = 0;

      if (bytes_read == SAFE_READ_ERROR)
        {
          error (0, errno, _("error reading %s"), quote (filename));
          return false;
        }
      if (bytes_read == 0)
        break;
      while (bytes_to_iter < bytes_read)
        {
          if (buffer[bytes_to_iter] == '\n')
            total_lines++;
          bytes_to_iter++;
        }
    }
  lines = total_lines;
  return true;
}

static bool
validate_ranges (const char *filename, int fd)
{
  bool ok;
  int i;
  long line_start, line_end, seen_end;

  seen_end = 0;

  ok = count_lines_in_stream (filename, fd);

  for (i = 0; i <= range_max; i += 2) 
    {
      line_start = ranges[i];
      line_end = ranges[i+1];

      if (line_start == 0)
        error (EXIT_FAILURE, 0, _("starting line must not be 0"));
      if (line_end == 0)
        error (EXIT_FAILURE, 0, _("ending line must not be 0"));

      relative_range_to_absolute (&line_start);
      relative_range_to_absolute (&line_end);

      if (line_start <= seen_end)
        error (EXIT_FAILURE, 0, _("starting line overlaps with previous ending one"));
      if (line_start > line_end)
        error (EXIT_FAILURE, 0, _("starting line must preceed the ending line"));

      if (line_start > lines)
        error (EXIT_FAILURE, 0, _("starting line exceeds total lines of %s"), quote (filename));
      if (line_end > lines)
        error (EXIT_FAILURE, 0, _("ending line exceeds total lines of %s"), quote (filename));

      seen_end = line_end;
    }
  return ok;
}

static void
exit_on_zero_read (const char *filename, int fd)
{
  char buffer[BUFSIZ];
  size_t bytes_read = safe_read (fd, buffer, BUFSIZ);

  if (!bytes_read)
    {
      if (have_read_stdin)
        error (EXIT_FAILURE, 0, _("zero bytes read from input pipe"));
      else
        error (EXIT_FAILURE, 0, _("zero bytes read from file %s"), quote (filename));
    }
}

static bool
buffer_stdin_to_tmpfile (const char *filename, int fd)
{
  char buffer[BUFSIZ];
  FILE *tempfile;

  if ((tempfile = tmpfile()) == NULL)
    {
      error (0, errno, _("cannot create temporary file"));
      return false;
    }
  while (1)
    {
      size_t bytes_read = safe_read (fd, buffer, BUFSIZ);

      if (bytes_read == SAFE_READ_ERROR)
        {
          error (0, errno, _("error reading %s"), quote (filename));
          return false;
        }
      if (bytes_read == 0)
        break;
      if (full_write (fileno (tempfile), buffer, bytes_read) != bytes_read)
        {
          error (0, errno, _("write error"));
          return false;
        }
    }
  input.fd = fileno (tempfile);
  input.fh = tempfile;
  rewind (tempfile);
  return true;
}

static bool
range_file (const char *filename)
{
  int fd;
  bool ok;
  is_stdin = STREQ (filename, "-");

  if (is_stdin)
    {
      have_read_stdin = true;
      fd = STDIN_FILENO;
      filename = _("standard input");
      if (O_BINARY && ! isatty (STDIN_FILENO))
      freopen (NULL, "rb", stdin);
    }
  else
    {
      fd = open (filename, O_RDONLY | O_BINARY);
      if (fd < 0)
        {
          error (0, errno, _("cannot open %s for reading"), quote (filename));
          return false;
        }
    }

  if (is_stdin)
    ok = buffer_stdin_to_tmpfile (filename, fd);
  else
    {
      input.fd = fd;
      ok = true;
    }

  exit_on_zero_read (filename, input.fd);

  ok &= validate_ranges (filename, input.fd);
  ok &= determine_seek_offsets (filename, input.fd);
  ok &= range_lines (filename, input.fd);

  if (is_stdin && fclose (input.fh) != 0)
    {
      error (0, errno, _("cannot close temporary file"));
      return false;
    }

  if (!is_stdin && close (fd) != 0)
    {
      error (0, errno, _("closing %s"), quote (filename));
      return false;
    }
  return ok;
}

void
parse_range_sets (char *ranges_arg)
{
  char *buf, *buf_orig, *range_digits, *range_iter;
  bool insert_range_at_end, seen_range_sep;
  int digits_count, digit_increment, ranges_seen, sets_seen;

  insert_range_at_end = false;
  seen_range_sep = false;

  sets_seen = 0;
  range_iter = ranges_arg;

  while (*range_iter)
    {
      if (*range_iter++ == ':')
        sets_seen++;
    }
  if (sets_seen == 1)
    {
      if (ranges_arg[0] == ':')
        {
          ranges[0] = 1;
          range_max++;
        }
      else if (ranges_arg[strlen(ranges_arg) - 1] == ':')
        insert_range_at_end = true;
    }
  ranges_seen = 0;
  while (*ranges_arg)
    {
      if (VALID_RANGE (*ranges_arg))
        {
          digits_count = 0;
          range_digits = ranges_arg;
          while (VALID_RANGE (*range_digits))
            {
              digits_count++;
              range_digits++;
            }
          digits_count++;
          buf = xmalloc (digits_count * sizeof (char));
          buf_orig = buf;
          while (VALID_RANGE (*ranges_arg))
            *buf++ = *ranges_arg++;
          *buf = '\0';
          buf = buf_orig;

          if (*buf == '+')
            {
              if ((range_max + 1) % 2 == 0)
                {
                  digit_increment = atoi (buf);
                  ranges[range_max] = ranges[range_max - 1] + digit_increment;
                }
              else
                {
                  free (buf); /* Would otherwise result in a leak, because a fatal error is being raised. */
                  error (EXIT_FAILURE, 0, _("+ prefix cannot be applied to starting lines"));
                }
            }
          else
            ranges[range_max] = atol (buf);
          range_max++;
          free (buf);

          if (range_max > (MAX_RANGE_SETS * 2))
            error (EXIT_FAILURE, 0, _("only %d sets permitted"), MAX_RANGE_SETS);

          ranges_seen++;
        }
      else
        {
          if (ranges_seen == 2)
            {
              if (*ranges_arg != ',')
                error (EXIT_FAILURE, 0, _("comma as set separator required"));
              ranges_seen = 0;
              seen_range_sep = false;
            }
          else if (ranges_seen == 1 && !seen_range_sep)
            {
              if (*ranges_arg != ':')
                error (EXIT_FAILURE, 0, _("colon as range separator required"));
              else
                seen_range_sep = true;
            }
          else
            {
              if (sets_seen != 1)
                {
                  if (!VALID_RANGE (*ranges_arg))
                    error (EXIT_FAILURE, 0, _("range must be number"));
                }
            }
          ranges_arg++;
        }
     }
  if (insert_range_at_end)
    ranges[range_max] = -1;
  else
    range_max--;
}

int
main(int argc, char **argv)
{
  char *ranges_arg;
  enum header_mode header_mode = multiple_files;
  bool ok = true;
  int c, i;

  /* Initializer for file_list if no file-arguments
     were specified on the command line.  */
  static char const *const default_file_list[] = {"-", NULL};
  char const *const *file_list;

  program_name = argv[0];
  setlocale (LC_ALL, "");
  bindtextdomain (PACKAGE, LOCALEDIR);
  textdomain (PACKAGE);

  atexit (close_stdout);

  if (argc == 1)
    usage (EXIT_FAILURE);

  while ((c = getopt_long (argc, argv, "nqr:vhV", long_options, NULL))
          != -1)
    {
      switch(c)
        {
        case 'n':
          number_mode = true;
          next_line_num();
          break;

        case 'q':
          header_mode = never;
          break;

        case 'r':
          range_mode = true;
          ranges_arg = optarg;
          break;

        case 'v':
          header_mode = always;
          break;

        case 'h':
          usage (EXIT_SUCCESS);
          break;

        case 'V':
          version();
          break;

        default:
          usage (EXIT_FAILURE);
        }
    }

  if (!range_mode)
    {
      if (optind < argc)
        {
          ranges_arg = argv[optind];
          optind++;
        }
      else
        usage (EXIT_FAILURE);
    }
  parse_range_sets (ranges_arg);

  file_list = (optind < argc
           ? (char const *const *) &argv[optind]
           : default_file_list);

  if (header_mode == always
      || (header_mode == multiple_files && optind < argc - 1))
    print_headers = true;

  if (O_BINARY && ! isatty (STDOUT_FILENO))
    freopen (NULL, "wb", stdout);

  for (i = 0; file_list[i]; i++)
    ok &= range_file (file_list[i]);

  if (have_read_stdin && close (STDIN_FILENO) < 0)
    error (EXIT_FAILURE, errno, "-");

  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}
