/*****************************************************************************

	unsort - reorder files semi-randomly
	Copyright (C) 2007, 2008  Wessel Dankers <wsl@fruit.je>

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program.  If not, see <http://www.gnu.org/licenses/>.

	$Id: unsort.c 1324 2008-06-07 20:38:28Z wsl $
	$URL: http://rot.zo.oi/svn/wsl/src/unsort/unsort.c $

*****************************************************************************/

#include <stdbool.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <getopt.h>
#include <limits.h>
#include <fcntl.h>
#include <sys/uio.h>

#include "error.h"
#include "filebuf.h"
#include "iovec.h"
#include "shuffle.h"
#include "merge.h"
#include "mt19937ar.h"
#include "mt19937ar_init.h"

#ifndef O_LARGEFILE
#define O_LARGEFILE 0
#endif

#ifndef VERSION
#define VERSION "$Id: unsort.c 1324 2008-06-07 20:38:28Z wsl $"
#endif

static const struct option long_options[] = {
	{"help\0             Print this message to stdout", 0, 0, 'h'},
	{"version\0          Print the program version", 0, 0, 'v'},
	{"random\0           Use a random permutation", 0, 0, 'r'},
	{"heuristic\0        Use a heuristic permutation (default)", 0, 0, 'p'},
	{"identity\0         Do not change the order of lines", 0, 0, 'n'},
	{"concatenate\0      Concatenate input before shuffling", 0, 0, 'c'},
	{"merge\0            Merge input after shuffling in given order", 0, 0, 'm'},
	{"merge-random\0     Merge input after shuffling (default)", 0, 0, 'M'},
	{"seed\0 <integer>   Seed the permutation", 1, 0, 's'},
	{"zero-terminated\0  Use \\0 line endings", 0, 0, 'z'},
	{"null\0             Use \\0 line endings", 0, 0, '0'},
	{"linefeed\0         Use \\n line endings (default)", 0, 0, 'l'},
	{0, 0, 0, 0}
};

static void usage(FILE *fh, const char *progname) {
	int i;
	fprintf(fh, "Usage: %s [-", progname);
	for(i = 0; long_options[i].name; i++)
		if(long_options[i].val && !long_options[i].has_arg)
			fputc(long_options[i].val, fh);
	fprintf(fh, "] [-s <integer>] [file...]\n");
	for(i = 0; long_options[i].name; i++)
		fprintf(fh, "\t-%c, --%s%s\n",
			long_options[i].val,
			long_options[i].name,
			long_options[i].name + strlen(long_options[i].name) + 1);
}

int main(int argc, char **argv) {
	int i, fd, option_index;
	struct iovec *iov;
	uint32_t u, numfiles, count, chunk_count, chunk_start;
	uint32_t *tlb, *chunk_tlb;
	filebuf_t *fb, *ds, **dd;

	uint32_t seed = 0;
	bool manual_seed = false;
	bool multi = true;
	shuffle_algo_t shuffle_algo = shuffle_heuristic;
	shuffle_algo_t shuffle_files = shuffle_random;
	char *end;
	int sep = '\n';

	opterr = 0;
	while((i = getopt_long(argc, argv, ":hvrpncmMs:z0l", long_options, &option_index)) != EOF) {
		switch(i) {
			case 'h':
				puts("unsort - reorder files semi-randomly");
				usage(stdout, *argv);
				exit(ERROR_NONE);
			case 'v':
				printf("unsort %s\ncopyright 2007, 2008 Wessel Dankers <wsl@fruit.je>\n", VERSION);
				exit(ERROR_NONE);
			case 'r':
				shuffle_algo = shuffle_random;
				break;
			case 'p':
				shuffle_algo = shuffle_heuristic;
				break;
			case 'n':
				shuffle_algo = shuffle_none;
				break;
			case 'c':
				multi = false;
				break;
			case 'm':
				multi = true;
				shuffle_files = shuffle_none;
				break;
			case 'M':
				multi = true;
				shuffle_files = shuffle_random;
				break;
			case 's':
				if(optarg && *optarg) {
					errno = 0;
					seed = strtoul(optarg, &end, 0);
					if(errno)
						exit_perror(ERROR_USER, "Can't parse seed '%s' as an unsigned integer", optarg);
					if(end && *end)
						exit_error(ERROR_USER, "Can't parse seed '%s' as an unsigned integer", optarg);
					manual_seed = true;
				} else {
					seed = UINT32_C(0);
					manual_seed = false;
				}
				break;
			case '0':
			case 'z':
				sep = '\0';
				break;
			case 'l':
				sep = '\n';
				break;
			case '?':
				usage(stderr, *argv);
				exit_error(ERROR_USER, "Unknown option: -%c", optopt);
			case ':':
				usage(stderr, *argv);
				exit_error(ERROR_USER, "Option -%c requires an argument", optopt);
			default:
				usage(stderr, *argv);
				exit_error(ERROR_INTERNAL, "Unknown option: -%c", i);
		}
	}

	if(argc > optind)
		numfiles = argc - optind;
	else
		numfiles = 1;

	if(manual_seed) {
		mt_seed(seed);
	} else {
		if(!mt_init_urandom())
			exit_perror(ERROR_SYSTEM, "Can't read from /dev/urandom");
		seed = mt_genrand32();
	}
	shuffle_seed(seed);

	dd = xalloc(numfiles * sizeof *dd);
	ds = xalloc(numfiles * sizeof *ds);
	tlb = (uint32_t *)ds;

	shuffle_files(NULL, tlb, numfiles);
	for(u = 0; u < numfiles; u++)
		dd[u] = ds + tlb[u];

	u = 0;
	if(argc > optind) {
		for(i = optind; i < argc; i++) {
			fb = dd[u++];
			*fb = filebuf_0;
			if(strcmp(argv[i], "-")) {
				fd = open(argv[i], O_RDONLY | O_LARGEFILE);
				if(fd == -1) {
					warn_perror("Can't open %s", argv[i]);
					continue;
				}
				filebuf_init(fb, fd);
				close(fd);
				fb->name = argv[i];
			} else {
				filebuf_init(fb, STDIN_FILENO);
			}
		}
	} else {
		filebuf_init(*dd, STDIN_FILENO);
	}

	count = 0;
	for(u = 0; u < numfiles; u++) {
		fb = dd[u];
		if(iovec_parse(fb, sep, NULL, NULL)) {
			if(fb->name)
				warn_error("%s: missing linebreak at end of file – line skipped", fb->name);
			else
				warn_error("missing linebreak at end of input – line skipped");
		}
		fb->start = count;
		count += fb->count;
	}

	if(!count)
		return 0;

	tlb = xalloc(count * sizeof *tlb);
	iov = xalloc(count * sizeof *iov);

	chunk_tlb = (uint32_t *)iov;
	shuffle_tmp(chunk_tlb + count);

	if(multi) {
		merge(dd, numfiles, NULL, chunk_tlb);

		for(u = 0; u < numfiles; u++) {
			fb = dd[u];
			chunk_start = fb->start;
			chunk_count = fb->count;
			shuffle_algo(chunk_tlb + chunk_start, tlb + chunk_start, chunk_count);
		}
	} else {
		shuffle_algo(NULL, tlb, count);
	}

	for(u = 0; u < numfiles; u++)
		iovec_parse(dd[u], sep, iov, tlb);

	writev_all(STDOUT_FILENO, iov, count);

	return 0;
}
