# HG changeset patch
# User ali <ali@juiblex.co.uk>
# Date 1369750639 -3600
# Node ID 1016349e619fe15277a41597cf9b8811c5411b25
# Parent  adb087007d082cc23390b68107f47584f937668e
Use GLib functions and data types

diff -r adb087007d08 -r 1016349e619f bl/textfileutils.c
--- a/bl/textfileutils.c	Mon May 27 09:03:04 2013 +0100
+++ b/bl/textfileutils.c	Tue May 28 15:17:19 2013 +0100
@@ -3,26 +3,21 @@
 #include <bl/bl.h>
 
 /*
- * Read a file into memory (which should be freed with mem_free when no
+ * Read a file into memory (which should be freed with g_free when no
  * longer required). Returns NULL on error and outputs a suitable error
  * message to stderr.
  * DOS-style line endings and UTF-8 BOM are handled transparently even
  * on platforms which don't normally use these formats.
  */
 gboolean file_get_contents_text(const char *filename,char **contents,
-  size_t *length)
+  size_t *length,GError **err)
 {
     int i;
     unsigned char *raw;
-    size_t raw_length;
+    gsize raw_length;
     GString *string;
-    GError *error=NULL;
-    if (!g_file_get_contents(filename,(char *)&raw,&raw_length,&error))
-    {
-	fprintf(stderr,"%s: %s\n",filename,error->message);
-	g_error_free(error);
+    if (!g_file_get_contents(filename,(char **)&raw,&raw_length,err))
 	return FALSE;
-    }
     string=g_string_new(NULL);
     i=0;
     if (raw_length>=3 && raw[0]==0xEF && raw[1]==0xBB && raw[2]==0xBF)
diff -r adb087007d08 -r 1016349e619f bl/textfileutils.h
--- a/bl/textfileutils.h	Mon May 27 09:03:04 2013 +0100
+++ b/bl/textfileutils.h	Tue May 28 15:17:19 2013 +0100
@@ -4,6 +4,6 @@
 #include <glib.h>
 
 gboolean file_get_contents_text(const char *filename,char **contents,
-  size_t *length);
+  size_t *length,GError **err);
 
 #endif /* BL_TEXTFILEUTILS_H */
diff -r adb087007d08 -r 1016349e619f bookloupe/Makefile.am
--- a/bookloupe/Makefile.am	Mon May 27 09:03:04 2013 +0100
+++ b/bookloupe/Makefile.am	Tue May 28 15:17:19 2013 +0100
@@ -1,5 +1,9 @@
+INCLUDES=-I$(top_srcdir)
 bin_PROGRAMS=bookloupe
 pkgdata_DATA=bookloupe.typ
+AM_CFLAGS=$(GLIB_CFLAGS)
+LIBS=$(GLIB_LIBS)
+LDADD=../bl/libbl.la
 
 bookloupe.typ:	bookloupe.typ.in
 	sed 's/$$/\r/' $< > $@
diff -r adb087007d08 -r 1016349e619f bookloupe/bookloupe.c
--- a/bookloupe/bookloupe.c	Mon May 27 09:03:04 2013 +0100
+++ b/bookloupe/bookloupe.c	Tue May 28 15:17:19 2013 +0100
@@ -22,19 +22,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
+#include <glib.h>
+#include <bl/bl.h>
 
-#define MAXWORDLEN    80    /* max length of one word */
-#define LINEBUFSIZE 2048    /* buffer size for an input line */
-
-#define MAX_USER_TYPOS 1000
-#define USERTYPO_FILE "gutcheck.typ"
-
-#ifndef MAX_PATH
-#define MAX_PATH 16384
-#endif
-
-char aline[LINEBUFSIZE];
-char prevline[LINEBUFSIZE];
+gchar *prevline;
 
 /* Common typos. */
 char *typo[] = {
@@ -70,7 +61,7 @@
     "se", ""
 };
 
-char *usertypo[MAX_USER_TYPOS];
+GTree *usertypo;
 
 /* Common abbreviations and other OK words not to query as typos. */
 char *okword[] = {
@@ -282,46 +273,57 @@
 #define WAY_TOO_LONG      80
 #define SHORTEST_PG_LINE  55
 
-#define SWITCHES "ESTPXLOYHWVMUD" /* switches:- */
-				  /*     D - ignore DP-specific markup */
-				  /*     E - echo queried line */
-				  /*     S - check single quotes */
-				  /*     T - check common typos	*/
-				  /*     P - require closure of quotes on */
-				  /*	 every paragraph */
-				  /*     X - "Trust no one" :-) Paranoid! */
-				  /*	 Queries everything */
-				  /*     L - line end checking defaults on */
-				  /*	 -L turns it off */
-				  /*     O - overview. Just shows counts. */
-				  /*     Y - puts errors to stdout */
-				  /*	 instead of stderr */
-				  /*     H - Echoes header fields */
-				  /*     M - Ignore markup in < > */
-				  /*     U - Use file of User-defined Typos */
-				  /*     W - Defaults for use on Web upload */
-				  /*     V - Verbose - list EVERYTHING! */
-#define SWITNO 14		  /* max number of switch parms	*/
-				  /*	- used for defining array-size */
-#define MINARGS   1  /* minimum no of args excl switches */
-#define MAXARGS   1  /* maximum no of args excl switches */
+enum {
+    ECHO_SWITCH,
+    SQUOTE_SWITCH,
+    TYPO_SWITCH,
+    QPARA_SWITCH,
+    PARANOID_SWITCH,
+    LINE_END_SWITCH,
+    OVERVIEW_SWITCH,
+    STDOUT_SWITCH,
+    HEADER_SWITCH,
+    WEB_SWITCH,
+    VERBOSE_SWITCH,
+    MARKUP_SWITCH,
+    USERTYPO_SWITCH,
+    DP_SWITCH,
+    SWITNO
+};
 
-int pswit[SWITNO];   /* program switches set by SWITCHES */
+gboolean pswit[SWITNO];  /* program switches */
 
-#define ECHO_SWITCH      0
-#define SQUOTE_SWITCH    1
-#define TYPO_SWITCH      2
-#define QPARA_SWITCH     3
-#define PARANOID_SWITCH  4
-#define LINE_END_SWITCH  5
-#define OVERVIEW_SWITCH  6
-#define STDOUT_SWITCH    7
-#define HEADER_SWITCH    8
-#define WEB_SWITCH       9
-#define VERBOSE_SWITCH   10
-#define MARKUP_SWITCH    11
-#define USERTYPO_SWITCH  12
-#define DP_SWITCH	 13
+static GOptionEntry options[]={
+    { "dp", 'd', 0, G_OPTION_ARG_NONE, pswit+DP_SWITCH,
+      "Ignore DP-specific markup", NULL },
+    { "noecho", 'e', 0, G_OPTION_ARG_NONE, pswit+ECHO_SWITCH,
+      "Don't echo queried line", NULL },
+    { "squote", 's', 0, G_OPTION_ARG_NONE, pswit+SQUOTE_SWITCH,
+      "Check single quotes", NULL },
+    { "typo", 't', 0, G_OPTION_ARG_NONE, pswit+TYPO_SWITCH,
+      "Check common typos", NULL },
+    { "qpara", 'p', 0, G_OPTION_ARG_NONE, pswit+QPARA_SWITCH,
+      "Require closure of quotes on every paragraph", NULL },
+    { "relaxed", 'x', 0, G_OPTION_ARG_NONE, pswit+PARANOID_SWITCH,
+      "Disable paranoid querying of everything", NULL },
+    { "line-end", 'l', 0, G_OPTION_ARG_NONE, pswit+LINE_END_SWITCH,
+      "Disable line end checking", NULL },
+    { "overview", 'o', 0, G_OPTION_ARG_NONE, pswit+OVERVIEW_SWITCH,
+      "Overview: just show counts", NULL },
+    { "stdout", 'y', 0, G_OPTION_ARG_NONE, pswit+STDOUT_SWITCH,
+      "Output errors to stdout instead of stderr", NULL },
+    { "header", 'h', 0, G_OPTION_ARG_NONE, pswit+HEADER_SWITCH,
+      "Echo header fields", NULL },
+    { "markup", 'm', 0, G_OPTION_ARG_NONE, pswit+MARKUP_SWITCH,
+      "Ignore markup in < >", NULL },
+    { "usertypo", 'u', 0, G_OPTION_ARG_NONE, pswit+USERTYPO_SWITCH,
+      "Use file of user-defined typos", NULL },
+    { "web", 'w', 0, G_OPTION_ARG_NONE, pswit+WEB_SWITCH,
+      "Defaults for use on www upload", NULL },
+    { "verbose", 'v', 0, G_OPTION_ARG_NONE, pswit+VERBOSE_SWITCH,
+      "Verbose - list everything", NULL },
+    { NULL }
+};
 
 long cnt_dquot;		/* for overview mode, count of doublequote queries */
 long cnt_squot;		/* for overview mode, count of singlequote queries */
@@ -340,47 +342,26 @@
 long linecnt;		/* count of total lines in the file */
 long checked_linecnt;	/* count of lines actually checked */
 
-void proghelp(void);
-void procfile(char *);
+void proghelp(GOptionContext *context);
+void procfile(const char *);
 
-#define LOW_THRESHOLD    0
-#define HIGH_THRESHOLD   1
+gchar *running_from;
 
-#define START 0
-#define END 1
-#define PREV 0
-#define NEXT 1
-#define FIRST_OF_PAIR 0
-#define SECOND_OF_PAIR 1
-
-#define MAX_WORDPAIR 1000
-
-char running_from[MAX_PATH];
-
-int mixdigit(char *);
-const char *getaword(const char *,char *);
-int matchword(char *,char *);
-char *flgets(char *,int,FILE *,long);
-void lowerit(char *);
-int gcisalpha(unsigned char);
-int gcisdigit(unsigned char);
-int gcisletter(unsigned char);
-char *gcstrchr(char *s,char c);
+int mixdigit(const char *);
+gchar *getaword(const char **);
+char *flgets(char **,long);
+gboolean gcisalpha(unsigned char);
+gboolean gcisdigit(unsigned char);
+gboolean gcisletter(unsigned char);
 void postprocess_for_HTML(char *);
 char *linehasmarkup(char *);
 char *losemarkup(char *);
-int tagcomp(char *,char *);
+int tagcomp(const char *,const char *);
 char *loseentities(char *);
-int isroman(char *);
-int usertypo_count;
+gboolean isroman(const char *);
 void postprocess_for_DP(char *);
 
-char wrk[LINEBUFSIZE];
-
-#define MAX_QWORD 50
-#define MAX_QWORD_LENGTH 40
-char qword[MAX_QWORD][MAX_QWORD_LENGTH];
-int dupcnt[MAX_QWORD];
+GTree *qword,*qperiod;
 
 struct first_pass_results {
     long firstline,astline;
@@ -392,7 +373,8 @@
 
 struct warnings {
     int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;
-    int endquote,isDutch,isFrench;
+    int endquote;
+    gboolean isDutch,isFrench;
 };
 
 struct counters {
@@ -411,52 +393,35 @@
 };
 
 struct pending {
-    char dquote[80],squote[80],rbrack[80],sbrack[80],cbrack[80],unders[80];
+    char *dquote,*squote,*rbrack,*sbrack,*cbrack,*unders;
     long squot;
 };
 
-int main(int argc,char **argv)
+void parse_options(int *argc,char ***argv)
 {
-    char *argsw,*s;
-    int i,switno,invarg;
-    char usertypo_file[MAX_PATH];
-    FILE *usertypofile;
-    if (strlen(argv[0])<sizeof(running_from))
-	/* save the path to the executable */
-	strcpy(running_from,argv[0]);
-    /* find out what directory we're running from */
-    s=running_from+strlen(running_from);
-    for (;*s!='/' && *s!='\\' && s>=running_from;s--)
-	*s=0;
-    switno=strlen(SWITCHES);
-    for (i=switno;--i>0;)
-	pswit[i]=0;	   /* initialise switches */
-    /*
-     * Standard loop to extract switches.
-     * When we come out of this loop, the arguments will be
-     * in argv[0] upwards and the switches used will be
-     * represented by their equivalent elements in pswit[]
-     */
-    while (--argc>0 && **++argv=='-')
-	for (argsw=argv[0]+1;*argsw!='\0';argsw++)
-	    for (i=switno,invarg=1;(--i>=0) && invarg==1;)
-		if ((toupper(*argsw))==SWITCHES[i])
-		{
-		    invarg=0;
-		    pswit[i]=1;
-		}
+    GError *err=NULL;
+    GOptionContext *context;
+    context=g_option_context_new(
+      "file - looks for errors in Project Gutenberg(TM) etexts");
+    g_option_context_add_main_entries(context,options,NULL);
+    if (!g_option_context_parse(context,argc,argv,&err))
+    {
+	g_printerr("Bookloupe: %s\n",err->message);
+	g_printerr("Use \"%s --help\" for help\n",(*argv)[0]);
+	exit(1);
+    }
     /* Paranoid checking is turned OFF, not on, by its switch */
-    pswit[PARANOID_SWITCH]^=1;
+    pswit[PARANOID_SWITCH]=!pswit[PARANOID_SWITCH];
     if (pswit[PARANOID_SWITCH])
-	/* if running in paranoid mode force typo checks as well   */
-	pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1;
+	/* if running in paranoid mode, typo checks default to enabled */
+	pswit[TYPO_SWITCH]=!pswit[TYPO_SWITCH];
     /* Line-end checking is turned OFF, not on, by its switch */
-    pswit[LINE_END_SWITCH]^=1;
+    pswit[LINE_END_SWITCH]=!pswit[LINE_END_SWITCH];
     /* Echoing is turned OFF, not on, by its switch */
-    pswit[ECHO_SWITCH]^=1;
+    pswit[ECHO_SWITCH]=!pswit[ECHO_SWITCH];
     if (pswit[OVERVIEW_SWITCH])
 	/* just print summary; don't echo */
-	pswit[ECHO_SWITCH]=0;
+	pswit[ECHO_SWITCH]=FALSE;
     /*
      * Web uploads - for the moment, this is really just a placeholder
      * until we decide what processing we really want to do on web uploads
@@ -464,85 +429,155 @@
     if (pswit[WEB_SWITCH])
     {
 	/* specific override for web uploads */
-	pswit[ECHO_SWITCH]=1;
-	pswit[SQUOTE_SWITCH]=0;
-	pswit[TYPO_SWITCH]=1;
-	pswit[QPARA_SWITCH]=0;
-	pswit[PARANOID_SWITCH]=1;
-	pswit[LINE_END_SWITCH]=0;
-	pswit[OVERVIEW_SWITCH]=0;
-	pswit[STDOUT_SWITCH]=0;
-	pswit[HEADER_SWITCH]=1;
-	pswit[VERBOSE_SWITCH]=0;
-	pswit[MARKUP_SWITCH]=0;
-	pswit[USERTYPO_SWITCH]=0;
-	pswit[DP_SWITCH]=0;
+	pswit[ECHO_SWITCH]=TRUE;
+	pswit[SQUOTE_SWITCH]=FALSE;
+	pswit[TYPO_SWITCH]=TRUE;
+	pswit[QPARA_SWITCH]=FALSE;
+	pswit[PARANOID_SWITCH]=TRUE;
+	pswit[LINE_END_SWITCH]=FALSE;
+	pswit[OVERVIEW_SWITCH]=FALSE;
+	pswit[STDOUT_SWITCH]=FALSE;
+	pswit[HEADER_SWITCH]=TRUE;
+	pswit[VERBOSE_SWITCH]=FALSE;
+	pswit[MARKUP_SWITCH]=FALSE;
+	pswit[USERTYPO_SWITCH]=FALSE;
+	pswit[DP_SWITCH]=FALSE;
     }
-    if (argc<MINARGS || argc>MAXARGS)
+    if (*argc<2)
     {
-	/* check number of args */
-	proghelp();
-	return 1;
+	proghelp(context);
+	exit(1);
     }
-    /* read in the user-defined stealth scanno list */
+    g_option_context_free(context);
+}
+
+/*
+ * read_user_scannos:
+ *
+ * Read in the user-defined stealth scanno list.
+ */
+void read_user_scannos(void)
+{
+    GError *err=NULL;
+    gchar *usertypo_file;
+    gboolean okay;
+    int i;
+    gsize len;
+    gchar *contents,**lines;
+    usertypo_file=g_strdup("bookloupe.typ");
+    okay=file_get_contents_text(usertypo_file,&contents,&len,&err);
+    if (g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
+    {
+	g_clear_error(&err);
+	g_free(usertypo_file);
+	usertypo_file=g_build_filename(running_from,"bookloupe.typ",NULL);
+	okay=file_get_contents_text(usertypo_file,&contents,&len,&err);
+    }
+    if (g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
+    {
+	g_clear_error(&err);
+	g_free(usertypo_file);
+	usertypo_file=g_strdup("gutcheck.typ");
+	okay=file_get_contents_text(usertypo_file,&contents,&len,&err);
+    }
+    if (g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
+    {
+	g_clear_error(&err);
+	g_free(usertypo_file);
+	usertypo_file=g_build_filename(running_from,"gutcheck.typ",NULL);
+	okay=file_get_contents_text(usertypo_file,&contents,&len,&err);
+    }
+    if (g_error_matches(err,G_FILE_ERROR,G_FILE_ERROR_NOENT))
+    {
+	g_free(usertypo_file);
+	printf("   --> I couldn't find bookloupe.typ "
+	  "-- proceeding without user typos.\n");
+	return;
+    }
+    else if (!okay)
+    {
+	fprintf(stderr,"%s: %s\n",usertypo_file,err->message);
+	g_free(usertypo_file);
+	g_clear_error(&err);
+	exit(1);
+    }
+    lines=g_strsplit(contents,"\n",0);
+    usertypo=g_tree_new_full((GCompareDataFunc)strcmp,NULL,g_free,NULL);
+    for (i=0;lines[i];i++)
+	if (*(unsigned char *)lines[i]>'!')
+	    g_tree_insert(usertypo,lines[i],GINT_TO_POINTER(1));
+	else
+	    g_free(lines[i]);
+    g_free(lines);
+}
+
+#if 0
+/*
+ * read_etext:
+ *
+ * Read an etext returning an array of lines. Lines are normally expected
+ * to be terminated by CR LF. Solitary LFs delimit lines but are left
+ * embedded at the end of the line for further processing. Solitary CRs
+ * do not delimit lines.
+ */
+gchar **read_etext(const char *filename,GError **err)
+{
+    int i;
+    const char *s,*t;
+    gchar *contents;
+    gchar **raw_lines;
+    GPtrArray *lines;
+    gsize len;
+    if (!g_file_get_contents(filename,&contents,&len,err))
+	return NULL;
+    raw_lines=g_strsplit(contents,"\r\n",0);
+    lines=g_ptr_array_sized_new(g_strv_length(raw_lines)+1);
+    for (i=0;raw_lines[i];i++)
+    {
+	t=strchr(raw_lines[i],'\n');
+	if (t)
+	{
+	    s=raw_lines[i];
+	    while ((t=strchr(s,'\n')))
+	    {
+		g_ptr_array_add(lines,g_strndup(s,t-s+1));
+		s=t+1;
+	    }
+	    g_ptr_array_add(lines,g_strdup(s));
+	    g_free(raw_lines[i]);
+	}
+	else
+	    g_ptr_array_add(lines,raw_lines[i]);
+    }
+    g_free(raw_lines);
+    g_ptr_array_add(lines,NULL);
+    return (gchar **)g_ptr_array_free(lines,FALSE);
+}
+#else
+/*
+ * read_etext:
+ *
+ * Read an etext returning a newly allocated string containing the file
+ * contents or NULL on error.
+ */
+gchar *read_etext(const char *filename,GError **err)
+{
+    gchar *contents;
+    gsize len;
+    if (!g_file_get_contents(filename,&contents,&len,err))
+	return NULL;
+    return contents;
+}
+#endif
+
+int main(int argc,char **argv)
+{
+    running_from=g_path_get_dirname(argv[0]);
+    parse_options(&argc,&argv);
     if (pswit[USERTYPO_SWITCH])
-    {
-	/* ... we were told we had one! */
-	usertypofile=fopen(USERTYPO_FILE,"rb");
-	if (!usertypofile)
-	{
-	    /* not in cwd. try excuteable directory. */
-	    strcpy(usertypo_file,running_from);
-	    strcat(usertypo_file,USERTYPO_FILE);
-	    usertypofile=fopen(usertypo_file,"rb");
-	    if (!usertypofile) {
-		/* we ain't got no user typo file! */
-		printf("   --> I couldn't find gutcheck.typ "
-		  "-- proceeding without user typos.\n");
-	    }
-	}
-	usertypo_count=0;
-	if (usertypofile)
-	{
-	    /* we managed to open a User Typo File! */
-	    if (pswit[USERTYPO_SWITCH])
-	    {
-		while (flgets(aline,LINEBUFSIZE-1,usertypofile,
-		  (long)usertypo_count))
-		{
-		    if (strlen(aline)>1)
-		    {
-			if ((int)*aline>33)
-			{
-			    s=malloc(strlen(aline)+1);
-			    if (!s)
-			    {
-				fprintf(stderr,"bookloupe: cannot get enough "
-				  "memory for user typo file!\n");
-				exit(1);
-			    }
-			    strcpy(s,aline);
-			    usertypo[usertypo_count]=s;
-			    usertypo_count++;
-			    if (usertypo_count>=MAX_USER_TYPOS)
-			    {
-				printf("   --> Only %d user-defined typos "
-				  "allowed: ignoring the rest\n",
-				  MAX_USER_TYPOS);
-				break;
-			    }
-			}
-		    }
-		}
-	    }
-	    fclose(usertypofile);
-	}
-    }
+	read_user_scannos();
     fprintf(stderr,"bookloupe: Check and report on an e-text\n");
-    cnt_dquot=cnt_squot=cnt_brack=cnt_bin=cnt_odd=cnt_long=
-    cnt_short=cnt_punct=cnt_dash=cnt_word=cnt_html=cnt_lineend=
-    cnt_spacend=0;
-    procfile(argv[0]);
+    procfile(argv[1]);
     if (pswit[OVERVIEW_SWITCH])
     {
 	printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
@@ -577,6 +612,9 @@
 	  cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
 	  cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
     }
+    g_free(running_from);
+    if (usertypo)
+	g_tree_unref(usertypo);
     return 0;
 }
 
@@ -588,28 +626,33 @@
  * occur many times in the text like long or short
  * lines, non-standard dashes, etc.
  */
-struct first_pass_results *first_pass(FILE *infile)
+struct first_pass_results *first_pass(const char *etext)
 {
     char laststart=CHAR_SPACE;
     const char *s;
-    int i,llen;
+    gchar *lc_line;
+    int i,j,llen;
+    gchar **lines;
     unsigned int lastlen=0,lastblen=0;
     long spline=0,nspline=0;
     static struct first_pass_results results={0};
-    char inword[MAXWORDLEN]="";
-    while (fgets(aline,LINEBUFSIZE-1,infile))
+    gchar *inword;
+    lines=g_strsplit(etext,"\n",0);
+    for (j=0;lines[j];j++)
     {
-	while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13)
-	    aline[strlen(aline)-1]=0;
+	llen=strlen(lines[j]);
+	while(lines[j][llen-1]=='\r')
+	    lines[j][llen--]='\0';
 	linecnt++;
-	if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") &&
-	  (strstr(aline,"PUBLIC DOMAIN") || strstr(aline,"COPYRIGHT")))
+	if (strstr(lines[j],"*END") && strstr(lines[j],"SMALL PRINT") &&
+	  (strstr(lines[j],"PUBLIC DOMAIN") || strstr(lines[j],"COPYRIGHT")))
 	{
 	    if (spline)
 		printf("   --> Duplicate header?\n");
 	    spline=linecnt+1;   /* first line of non-header text, that is */
 	}
-	if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG"))
+	if (!strncmp(lines[j],"*** START",9) &&
+	  strstr(lines[j],"PROJECT GUTENBERG"))
 	{
 	    if (nspline)
 		printf("   --> Duplicate header?\n");
@@ -617,10 +660,10 @@
 	}
 	if (spline || nspline)
 	{
-	    lowerit(aline);
-	    if (strstr(aline,"end") && strstr(aline,"project gutenberg"))
+	    lc_line=g_ascii_strdown(lines[j],llen);
+	    if (strstr(lc_line,"end") && strstr(lc_line,"project gutenberg"))
 	    {
-		if (strstr(aline,"end")<strstr(aline,"project gutenberg"))
+		if (strstr(lc_line,"end")<strstr(lc_line,"project gutenberg"))
 		{
 		    if (results.footerline)
 		    {
@@ -632,6 +675,7 @@
 			results.footerline=linecnt;
 		}
 	    }
+	    g_free(lc_line);
 	}
 	if (spline)
 	    results.firstline=spline;
@@ -639,85 +683,83 @@
 	    results.firstline=nspline;  /* override with new */
 	if (results.footerline)
 	    continue;    /* don't count the boilerplate in the footer */
-	llen=strlen(aline);
 	results.totlen+=llen;
 	for (i=0;i<llen;i++)
 	{
-	    if ((unsigned char)aline[i]>127)
+	    if ((unsigned char)lines[j][i]>127)
 		results.binlen++;
-	    if (gcisalpha(aline[i]))
+	    if (gcisalpha(lines[j][i]))
 		results.alphalen++;
-	    if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
+	    if (i>0 && lines[j][i]==CHAR_DQUOTE && isalpha(lines[j][i-1]))
 		results.endquote_count++;
 	}
-	if (strlen(aline)>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE &&
-	  lastblen>2 && lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
+	if (llen>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE && lastblen>2 &&
+	  lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
 	    results.shortline++;
-	if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE)
+	if (llen>0 && (unsigned char)lines[j][llen-1]<=CHAR_SPACE)
 	    cnt_spacend++;
-	if (strstr(aline,".,"))
+	if (strstr(lines[j],".,"))
 	    results.dotcomma++;
 	/* only count ast lines for ignoring purposes where there is */
 	/* locase text on the line */
-	if (strstr(aline,"*"))
+	if (strchr(lines[j],'*'))
 	{
-	    for (s=aline;*s;s++)
+	    for (s=lines[j];*s;s++)
 		if (*s>='a' && *s<='z')
 		    break;
 	     if (*s)
 		results.astline++;
 	}
-	if (strstr(aline,"/"))
+	if (strchr(lines[j],'/'))
 	    results.fslashline++;
-	for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
+	for (i=llen-1;i>0 && (unsigned char)lines[j][i]<=CHAR_SPACE;i--)
 	    ;
-	if (aline[i]=='-' && aline[i-1]!='-')
+	if (i>1 && lines[j][i]=='-' && lines[j][i-1]!='-')
 	    results.hyphens++;
 	if (llen>LONGEST_PG_LINE)
 	    results.longline++;
 	if (llen>WAY_TOO_LONG)
 	    results.verylongline++;
-	if (strstr(aline,"<") && strstr(aline,">"))
+	if (strchr(lines[j],'<') && strchr(lines[j],'>'))
 	{
-	    i=(int)(strstr(aline,">")-strstr(aline,"<")+1);
+	    i=(int)(strchr(lines[j],'>')-strchr(lines[j],'<')+1);
 	    if (i>0)
 		results.htmcount++;
-	    if (strstr(aline,"<i>"))
+	    if (strstr(lines[j],"<i>"))
 		results.htmcount+=4; /* bonus marks! */
 	}
 	/* Check for spaced em-dashes */
-	if (strstr(aline,"--"))
+	if (lines[j][0] && (s=strstr(lines[j]+1,"--")))
 	{
 	    results.emdash++;
-	    if (*(strstr(aline,"--")-1)==CHAR_SPACE ||
-	       (*(strstr(aline,"--")+2)==CHAR_SPACE))
+	    if (s[-1]==CHAR_SPACE || (s[2]==CHAR_SPACE))
 		results.space_emdash++;
-	    if (*(strstr(aline,"--")-1)==CHAR_SPACE &&
-	       (*(strstr(aline,"--")+2)==CHAR_SPACE))
+	    if (s[-1]==CHAR_SPACE && (s[2]==CHAR_SPACE))
 		/* count of em-dashes with spaces both sides */
 		results.non_PG_space_emdash++;
-	    if (*(strstr(aline,"--")-1)!=CHAR_SPACE &&
-	       (*(strstr(aline,"--")+2)!=CHAR_SPACE))
+	    if (s[-1]!=CHAR_SPACE && (s[2]!=CHAR_SPACE))
 		/* count of PG-type em-dashes with no spaces */
 		results.PG_space_emdash++;
 	}
-	for (s=aline;*s;)
+	for (s=lines[j];*s;)
 	{
-	    s=getaword(s,inword);
+	    inword=getaword(&s);
 	    if (!strcmp(inword,"hij") || !strcmp(inword,"niet")) 
 		results.Dutchcount++;
 	    if (!strcmp(inword,"dans") || !strcmp(inword,"avec")) 
 		results.Frenchcount++;
 	    if (!strcmp(inword,"0") || !strcmp(inword,"1")) 
 		results.standalone_digit++;
+	    g_free(inword);
 	}
 	/* Check for spaced dashes */
-	if (strstr(aline," -") && *(strstr(aline," -")+2)!='-')
+	if (strstr(lines[j]," -") && *(strstr(lines[j]," -")+2)!='-')
 	    results.spacedash++;
 	lastblen=lastlen;
-	lastlen=strlen(aline);
-	laststart=aline[0];
+	lastlen=llen;
+	laststart=lines[j][0];
     }
+    g_strfreev(lines);
     return &results;
 }
 
@@ -856,17 +898,17 @@
 	  "Not reporting them.\n");
 	warnings.bin=0;
     }
-    warnings.isDutch=0;
+    warnings.isDutch=FALSE;
     if (results->Dutchcount>50)
     {
-	warnings.isDutch=1;
+	warnings.isDutch=TRUE;
 	printf("   --> This looks like Dutch - "
 	  "switching off dashes and warnings for 's Middags case.\n");
     }
-    warnings.isFrench=0;
+    warnings.isFrench=FALSE;
     if (results->Frenchcount>50)
     {
-	warnings.isFrench=1;
+	warnings.isFrench=TRUE;
 	printf("   --> This looks like French - "
 	  "switching off some doublepunct.\n");
     }
@@ -919,12 +961,14 @@
  * count it, since empty lines with asterisks or dashes to
  * separate sections are common.
  *
- * Returns: Non-zero if the line is empty.
+ * Returns: TRUE if the line is empty.
  */
-int analyse_quotes(const char *s,struct counters *counters)
+gboolean analyse_quotes(const char *aline,struct counters *counters)
 {
     int guessquote=0;
-    int isemptyline=1;    /* assume the line is empty until proven otherwise */
+    /* assume the line is empty until proven otherwise */
+    gboolean isemptyline=TRUE;
+    const char *s=aline;
     while (*s)
     {
 	if (*s==CHAR_DQUOTE)
@@ -986,7 +1030,7 @@
 	}
 	if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
 	  *s!=13 && *s!=10)
-	    isemptyline=0;  /* ignore lines like  *  *  *  as spacers */
+	    isemptyline=FALSE;  /* ignore lines like  *  *  *  as spacers */
 	if (*s==CHAR_UNDERSCORE)
 	    counters->c_unders++;
 	if (*s==CHAR_OPEN_CBRACK)
@@ -1040,7 +1084,7 @@
  * Check for binary and other odd characters.
  */
 void check_for_odd_characters(const char *aline,const struct warnings *warnings,
-  int isemptyline)
+  gboolean isemptyline)
 {
     /* Don't repeat multiple warnings on one line. */
     int eNon_A=0,eTab=0,eTilde=0,eCarat=0,eFSlash=0,eAst=0;
@@ -1461,16 +1505,15 @@
 void check_for_extra_period(const char *aline,const struct warnings *warnings)
 {
     const char *s,*t,*s1;
-    int i,istypo,isdup;
-    static char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
-    static int qperiod_index=0;
-    char testword[MAXWORDLEN]="";
+    int i;
+    gboolean istypo;
+    gchar *testword;
     if (pswit[PARANOID_SWITCH])
     {
-	for (t=s=aline;strstr(t,". ");)
+	for (t=aline;strstr(t,". ");)
 	{
 	    t=strstr(t,". ");
-	    if (t==s)
+	    if (t==aline)
 	    {
 		t++;
 		/* start of line punctuation is handled elsewhere */
@@ -1497,57 +1540,48 @@
 	    if (*s1>='a' && *s1<='z')
 	    {
 		/* we have something to investigate */
-		istypo=1;
+		istypo=TRUE;
 		/* so let's go back and find out */
-		for (s1=t-1;s1>=s &&
+		for (s1=t-1;s1>=aline &&
 		  (gcisalpha(*s1) || gcisdigit(*s1) || *s1==CHAR_SQUOTE &&
 		  gcisalpha(s1[1]) && gcisalpha(s1[-1]));s1--)
 		    ;
 		s1++;
-		for (i=0;*s1 && *s1!='.';s1++,i++)
-		    testword[i]=*s1;
-		testword[i]=0;
+		s=strchr(s1,'.');
+		if (s)
+		    testword=g_strndup(s1,s-s1);
+		else
+		    testword=g_strdup(s1);
 		for (i=0;*abbrev[i];i++)
 		    if (!strcmp(testword,abbrev[i]))
-			istypo=0;
+			istypo=FALSE;
 		if (gcisdigit(*testword))
-		    istypo=0;
+		    istypo=FALSE;
 		if (!testword[1])
-		    istypo=0;
+		    istypo=FALSE;
 		if (isroman(testword))
-		    istypo=0;
+		    istypo=FALSE;
 		if (istypo)
 		{
-		    istypo=0;
+		    istypo=FALSE;
 		    for (i=0;testword[i];i++)
 			if (strchr(vowels,testword[i]))
-			    istypo=1;
+			    istypo=TRUE;
 		}
-		if (istypo)
+		if (istypo &&
+		  (pswit[VERBOSE_SWITCH] || !g_tree_lookup(qperiod,testword)))
 		{
-		    isdup=0;
-		    if (strlen(testword)<MAX_QWORD_LENGTH &&
-		      !pswit[VERBOSE_SWITCH])
-			for (i=0;i<qperiod_index;i++)
-			    if (!strcmp(testword,qperiod[i]))
-				isdup=1;
-		    if (!isdup)
-		    {
-			if (qperiod_index<MAX_QWORD &&
-			  strlen(testword)<MAX_QWORD_LENGTH)
-			{
-			    strcpy(qperiod[qperiod_index],testword);
-			    qperiod_index++;
-			}
-			if (pswit[ECHO_SWITCH])
-			    printf("\n%s\n",aline);
-			if (!pswit[OVERVIEW_SWITCH])
-			    printf("    Line %ld column %d - Extra period?\n",
-			      linecnt,(int)(t-aline)+1);
-			else
-			    cnt_punct++;
-		    }
+		    g_tree_insert(qperiod,g_strdup(testword),
+		      GINT_TO_POINTER(1));
+		    if (pswit[ECHO_SWITCH])
+			printf("\n%s\n",aline);
+		    if (!pswit[OVERVIEW_SWITCH])
+			printf("    Line %ld column %d - Extra period?\n",
+			  linecnt,(int)(t-aline)+1);
+		    else
+			cnt_punct++;
 		}
+		g_free(testword);
 	    }
 	    t++;
 	}
@@ -1563,16 +1597,20 @@
 {
     int i;
     const char *s,*wordstart;
-    char inword[MAXWORDLEN];
+    gchar *inword,*t;
     if (pswit[TYPO_SWITCH])
     {
 	for (s=aline;*s;)
 	{
 	    wordstart=s;
-	    s=getaword(s,inword);
-	    if (!*inword)
+	    t=getaword(&s);
+	    if (!*t)
+	    {
+		g_free(t);
 		continue;
-	    lowerit(inword);
+	    }
+	    inword=g_ascii_strdown(t,-1);
+	    g_free(t);
 	    for (i=0;*nocomma[i];i++)
 		if (!strcmp(inword,nocomma[i]))
 		{
@@ -1603,6 +1641,7 @@
 			    cnt_punct++;
 		    }
 		}
+	    g_free(inword);
 	}
     }
 }
@@ -1616,15 +1655,18 @@
 void check_for_typos(const char *aline,struct warnings *warnings)
 {
     const char *s,*wordstart;
-    char inword[MAXWORDLEN],testword[MAXWORDLEN];
-    int i,istypo,isdup,alower,vowel,consonant;
-    static int qword_index=0;
+    gchar *inword,*testword;
+    int i,alower,vowel,consonant,*dupcnt;
+    gboolean isdup,istypo;
     for (s=aline;*s;)
     {
 	wordstart=s;
-	s=getaword(s,inword);
+	inword=getaword(&s);
 	if (!*inword)
+	{
+	    g_free(inword);
 	    continue; /* don't bother with empty lines */
+	}
 	if (mixdigit(inword))
 	{
 	    if (pswit[ECHO_SWITCH])
@@ -1639,10 +1681,10 @@
 	 * Put the word through a series of tests for likely typos and OCR
 	 * errors.
 	 */
-	if (pswit[TYPO_SWITCH])
+	if (pswit[TYPO_SWITCH] || pswit[USERTYPO_SWITCH])
 	{
-	    istypo=0;
-	    strcpy(testword,inword);
+	    istypo=FALSE;
+	    testword=g_strdup(inword);
 	    alower=0;
 	    for (i=0;i<(int)strlen(testword);i++)
 	    {
@@ -1662,10 +1704,13 @@
 		      testword[2]=='c' || i>0 && testword[i-1]==CHAR_SQUOTE)
 			; /* do nothing! */
 		    else
-			istypo=1;
+			istypo=TRUE;
 		}
 		testword[i]=(char)tolower(testword[i]);
 	    }
+	}
+	if (pswit[TYPO_SWITCH])
+	{
 	    /*
 	     * Check for certain unlikely two-letter combinations at word
 	     * start and end.
@@ -1674,26 +1719,26 @@
 	    {
 		for (i=0;*nostart[i];i++)
 		    if (!strncmp(testword,nostart[i],2))
-			istypo=1;
+			istypo=TRUE;
 		for (i=0;*noend[i];i++)
 		    if (!strncmp(testword+strlen(testword)-2,noend[i],2))
-			istypo=1;
+			istypo=TRUE;
 	    }
 	    /* ght is common, gbt never. Like that. */
 	    if (strstr(testword,"cb"))
-		istypo=1;
+		istypo=TRUE;
 	    if (strstr(testword,"gbt"))
-		istypo=1;
+		istypo=TRUE;
 	    if (strstr(testword,"pbt"))
-		istypo=1;
+		istypo=TRUE;
 	    if (strstr(testword,"tbs"))
-		istypo=1;
+		istypo=TRUE;
 	    if (strstr(testword,"mrn"))
-		istypo=1;
+		istypo=TRUE;
 	    if (strstr(testword,"ahle"))
-		istypo=1;
+		istypo=TRUE;
 	    if (strstr(testword,"ihle"))
-		istypo=1;
+		istypo=TRUE;
 	    /*
 	     * "TBE" does happen - like HEARTBEAT - but uncommon.
 	     * Also "TBI" - frostbite, outbid - but uncommon.
@@ -1701,11 +1746,11 @@
 	     * numerals, but "ii" is a common scanno.
 	     */
 	    if (strstr(testword,"tbi"))
-		istypo=1;
+		istypo=TRUE;
 	    if (strstr(testword,"tbe"))
-		istypo=1;
+		istypo=TRUE;
 	    if (strstr(testword,"ii"))
-		istypo=1;
+		istypo=TRUE;
 	    /*
 	     * Check for no vowels or no consonants.
 	     * If none, flag a typo.
@@ -1727,7 +1772,7 @@
 			consonant++;
 		}
 		if (!vowel || !consonant)
-		    istypo=1;
+		    istypo=TRUE;
 	    }
 	    /*
 	     * Now exclude the word from being reported if it's in
@@ -1735,18 +1780,18 @@
 	     */
 	    for (i=0;*okword[i];i++)
 		if (!strcmp(testword,okword[i]))
-		    istypo=0;
+		    istypo=FALSE;
 	    /*
 	     * What looks like a typo may be a Roman numeral.
 	     * Exclude these.
 	     */
 	    if (istypo && isroman(testword))
-		istypo=0;
+		istypo=FALSE;
 	    /* Check the manual list of typos. */
 	    if (!istypo)
 		for (i=0;*typo[i];i++)
 		    if (!strcmp(testword,typo[i]))
-			istypo=1;
+			istypo=TRUE;
 	    /*
 	     * Check lowercase s, l, i and m - special cases.
 	     *   "j" - often a semi-colon gone wrong.
@@ -1754,34 +1799,30 @@
 	     *   "n" for "in"
 	     */
 	    if (!istypo && strlen(testword)==1 && strchr("slmijdn",*inword))
-		istypo=1;
+		istypo=TRUE;
 	    if (istypo)
 	    {
-		isdup=0;
-		if (strlen(testword)<MAX_QWORD_LENGTH &&
-		  !pswit[VERBOSE_SWITCH])
-		    for (i=0;i<qword_index;i++)
-			if (!strcmp(testword,qword[i]))
-			{
-			    isdup=1;
-			    ++dupcnt[i];
-			}
+		dupcnt=g_tree_lookup(qword,testword);
+		if (dupcnt)
+		{
+		    (*dupcnt)++;
+		    isdup=!pswit[VERBOSE_SWITCH];
+		}
+		else
+		{
+		    dupcnt=g_new0(int,1);
+		    g_tree_insert(qword,g_strdup(testword),dupcnt);
+		    isdup=FALSE;
+		}
 		if (!isdup)
 		{
-		    if (qword_index<MAX_QWORD &&
-		      strlen(testword)<MAX_QWORD_LENGTH)
-		    {
-			strcpy(qword[qword_index],testword);
-			qword_index++;
-		    }
 		    if (pswit[ECHO_SWITCH])
 			printf("\n%s\n",aline);
 		    if (!pswit[OVERVIEW_SWITCH])
 		    {
 			printf("    Line %ld column %d - Query word %s",
 			  linecnt,(int)(wordstart-aline)+1,inword);
-			if (strlen(testword)<MAX_QWORD_LENGTH &&
-			  !pswit[VERBOSE_SWITCH])
+			if (!pswit[VERBOSE_SWITCH])
 			    printf(" - not reporting duplicates");
 			printf("\n");
 		    }
@@ -1791,17 +1832,16 @@
 	    }
 	}
 	/* check the user's list of typos */
-	if (!istypo && usertypo_count)
-	    for (i=0;i<usertypo_count;i++)
-		if (!strcmp(testword,usertypo[i]))
-		{
-		    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",aline);
-		    if (!pswit[OVERVIEW_SWITCH])  
-			printf("    Line %ld column %d - "
-			  "Query possible scanno %s\n",
-			  linecnt,(int)(wordstart-aline)+2,inword);
-		}
+	if (!istypo && usertypo && g_tree_lookup(usertypo,testword))
+	{
+	    if (pswit[ECHO_SWITCH])
+		printf("\n%s\n",aline);
+	    if (!pswit[OVERVIEW_SWITCH])  
+		printf("    Line %ld column %d - Query possible scanno %s\n",
+		  linecnt,(int)(wordstart-aline)+2,inword);
+	}
+	if (pswit[TYPO_SWITCH] || pswit[USERTYPO_SWITCH])
+	    g_free(testword);
 	if (pswit[PARANOID_SWITCH] && warnings->digit)
 	{
 	    /* In paranoid mode, query all 0 and 1 standing alone. */
@@ -1816,6 +1856,7 @@
 		    cnt_word++;
 	    }
 	}
+	g_free(inword);
     }
 }
 
@@ -1830,9 +1871,10 @@
  * quotes "like"this.
  */
 void check_for_misspaced_punctuation(const char *aline,
-  struct parities *parities,int isemptyline)
+  struct parities *parities,gboolean isemptyline)
 {
-    int i,llen,isacro,isellipsis;
+    int i,llen;
+    gboolean isacro,isellipsis;
     const char *s;
     llen=strlen(aline);
     for (i=1;i<llen;i++)
@@ -1841,9 +1883,9 @@
 	if (strchr(".?!,;:_",aline[i]))  /* if it's punctuation */
 	{
 	    /* we need to suppress warnings for acronyms like M.D. */
-	    isacro=0;
+	    isacro=FALSE;
 	    /* we need to suppress warnings for ellipsis . . . */
-	    isellipsis=0;
+	    isellipsis=FALSE;
 	    /* if there are letters on both sides of it or ... */
 	    if (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1]) ||
 	       gcisalpha(aline[i+1]) && strchr("?!,;:",aline[i]))
@@ -1852,9 +1894,9 @@
 		if (aline[i]=='.')
 		{
 		    if (i>2 && aline[i-2]=='.')
-			isacro=1;
+			isacro=TRUE;
 		    if (i+2<llen && aline[i+2]=='.')
-			isacro=1;
+			isacro=TRUE;
 		}
 		if (!isacro)
 		{
@@ -1877,9 +1919,9 @@
 		if (aline[i]=='.')
 		{
 		    if (i>2 && aline[i-2]=='.')
-			isellipsis=1;
+			isellipsis=TRUE;
 		    if (i+2<llen && aline[i+2]=='.')
-			isellipsis=1;
+			isellipsis=TRUE;
 		}
 		if (!isemptyline && !isellipsis)
 		{
@@ -2177,6 +2219,8 @@
 void check_for_miscased_genative(const char *aline)
 {
     const char *s;
+    if (!*aline)
+	return;
     s=aline+1;
     while (*s)
     {
@@ -2321,13 +2365,11 @@
 	    i=(int)(close-open+1);
 	    if (i>0)
 	    {
-		strncpy(wrk,open,i);
-		wrk[i]=0;
 		if (pswit[ECHO_SWITCH])
 		    printf("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - HTML Tag? %s \n",
-		      linecnt,(int)(open-aline)+1,wrk);
+		    printf("    Line %ld column %d - HTML Tag? %*.*s \n",
+		      linecnt,(int)(open-aline)+1,i,i,open);
 		else
 		    cnt_html++;
 	    }
@@ -2359,13 +2401,11 @@
 		    i=0;		/* Don't report "Jones & Son;" */
 	    if (i>0)
 	    {
-		strncpy(wrk,amp,i);
-		wrk[i]=0;
 		if (pswit[ECHO_SWITCH])
 		    printf("\n%s\n",aline);
 		if (!pswit[OVERVIEW_SWITCH])
-		    printf("    Line %ld column %d - HTML symbol? %s \n",
-		      linecnt,(int)(amp-aline)+1,wrk);
+		    printf("    Line %ld column %d - HTML symbol? %*.*s \n",
+		      linecnt,(int)(amp-aline)+1,i,i,amp);
 		else
 		    cnt_html++;
 	    }
@@ -2388,7 +2428,8 @@
     s=aline;
     while (*s==' ')
 	s++;
-    if (*pending->dquote)
+    if (pending->dquote)
+    {
 	if (*s!=CHAR_DQUOTE || pswit[QPARA_SWITCH])
 	{
 	    if (!pswit[OVERVIEW_SWITCH])
@@ -2400,7 +2441,10 @@
 	    else
 		cnt_dquot++;
 	}
-    if (*pending->squote)
+	g_free(pending->dquote);
+	pending->dquote=NULL;
+    }
+    if (pending->squote)
     {
 	if (*s!=CHAR_SQUOTE && *s!=CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] ||
 	  pending->squot)
@@ -2414,8 +2458,10 @@
 	    else
 		cnt_squot++;
 	}
+	g_free(pending->squote);
+	pending->squote=NULL;
     }
-    if (*pending->rbrack)
+    if (pending->rbrack)
     {
 	if (!pswit[OVERVIEW_SWITCH])
 	{
@@ -2425,8 +2471,10 @@
 	}
 	else
 	    cnt_brack++;
+	g_free(pending->rbrack);
+	pending->rbrack=NULL;
     }
-    if (*pending->sbrack)
+    if (pending->sbrack)
     {
 	if (!pswit[OVERVIEW_SWITCH])
 	{
@@ -2436,8 +2484,10 @@
 	}
 	else
 	    cnt_brack++;
+	g_free(pending->sbrack);
+	pending->sbrack=NULL;
     }
-    if (*pending->cbrack)
+    if (pending->cbrack)
     {
 	if (!pswit[OVERVIEW_SWITCH])
 	{
@@ -2447,8 +2497,10 @@
 	}
 	else
 	    cnt_brack++;
+	g_free(pending->cbrack);
+	pending->cbrack=NULL;
     }
-    if (*pending->unders)
+    if (pending->unders)
     {
 	if (!pswit[OVERVIEW_SWITCH])
 	{
@@ -2458,6 +2510,8 @@
 	}
 	else
 	    cnt_brack++;
+	g_free(pending->unders);
+	pending->unders=NULL;
     }
 }
 
@@ -2481,12 +2535,12 @@
   struct pending *pending)
 {
     if (counters->quot%2)
-	sprintf(pending->dquote,"    Line %ld - Mismatched quotes",
-	  linecnt);
+	pending->dquote=
+	  g_strdup_printf("    Line %ld - Mismatched quotes",linecnt);
     if (pswit[SQUOTE_SWITCH] && counters->open_single_quote &&
       counters->open_single_quote!=counters->close_single_quote)
-	sprintf(pending->squote,"    Line %ld - Mismatched singlequotes?",
-	  linecnt);
+	pending->squote=
+	  g_strdup_printf("    Line %ld - Mismatched singlequotes?",linecnt);
     if (pswit[SQUOTE_SWITCH] && counters->open_single_quote &&
       counters->open_single_quote!=counters->close_single_quote &&
       counters->open_single_quote!=counters->close_single_quote+1)
@@ -2496,17 +2550,17 @@
 	 */
 	pending->squot=1;
     if (counters->r_brack)
-	sprintf(pending->rbrack,"    Line %ld - Mismatched round brackets?",
-	  linecnt);
+	pending->rbrack=
+	  g_strdup_printf("    Line %ld - Mismatched round brackets?",linecnt);
     if (counters->s_brack)
-	sprintf(pending->sbrack,"    Line %ld - Mismatched square brackets?",
-	  linecnt);
+	pending->sbrack=
+	  g_strdup_printf("    Line %ld - Mismatched square brackets?",linecnt);
     if (counters->c_brack)
-	sprintf(pending->cbrack,"    Line %ld - Mismatched curly brackets?",
-	  linecnt);
+	pending->cbrack=
+	  g_strdup_printf("    Line %ld - Mismatched curly brackets?",linecnt);
     if (counters->c_unders%2)
-	sprintf(pending->unders,"    Line %ld - Mismatched underscores?",
-	  linecnt);
+	pending->unders=
+	  g_strdup_printf("    Line %ld - Mismatched underscores?",linecnt);
 }
 
 /*
@@ -2563,50 +2617,63 @@
     }
 }
 
+gboolean report_duplicate_queries(gpointer key,gpointer value,gpointer data)
+{
+    const char *word=key;
+    int *dupcnt=value;
+    if (*dupcnt)
+	printf("\nNote: Queried word %s was duplicated %d times\n",
+	  word,*dupcnt);
+    return FALSE;
+}
+
 /*
  * procfile:
  *
  * Process one file.
  */
-void procfile(char *filename)
+void procfile(const char *filename)
 {
     const char *s;
-    char parastart[81];     /* first line of current para */
-    FILE *infile;
+    gchar *parastart=NULL;	/* first line of current para */
+    gchar *etext,*aline;
+    gchar *etext_ptr;
+    GError *err=NULL;
     struct first_pass_results *first_pass_results;
     struct warnings *warnings;
     struct counters counters={0};
     struct line_properties last={0};
     struct parities parities={0};
-    struct pending pending={{0},};
-    int isemptyline;
+    struct pending pending={0};
+    gboolean isemptyline;
     long start_para_line=0;
-    int i,isnewpara=0,enddash=0;
+    gboolean isnewpara=FALSE,enddash=FALSE;
     last.start=CHAR_SPACE;
-    *prevline=0;
     linecnt=checked_linecnt=0;
-    infile=fopen(filename,"rb");
-    if (!infile)
+    etext=read_etext(filename,&err);
+    if (!etext)
     {
 	if (pswit[STDOUT_SWITCH])
-	    fprintf(stdout,"bookloupe: cannot open %s\n",filename);
+	    fprintf(stdout,"bookloupe: %s: %s\n",filename,err->message);
 	else
-	    fprintf(stderr,"bookloupe: cannot open %s\n",filename);
+	    fprintf(stderr,"bookloupe: %s: %s\n",filename,err->message);
 	exit(1);
     }
     fprintf(stdout,"\n\nFile: %s\n\n",filename);
-    first_pass_results=first_pass(infile);
+    first_pass_results=first_pass(etext);
     warnings=report_first_pass(first_pass_results);
+    qword=g_tree_new_full((GCompareDataFunc)strcmp,NULL,g_free,g_free);
+    qperiod=g_tree_new_full((GCompareDataFunc)strcmp,NULL,g_free,NULL);
     /*
      * Here we go with the main pass. Hold onto yer hat!
      */
-    rewind(infile);
     linecnt=0;
-    while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
+    etext_ptr=etext;
+    while ((aline=flgets(&etext_ptr,linecnt+1)))
     {
 	linecnt++;
 	if (linecnt==1)
-	    isnewpara=1;
+	    isnewpara=TRUE;
 	if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11))
 	    continue;    // skip DP page separators completely
 	if (linecnt<first_pass_results->firstline ||
@@ -2635,8 +2702,8 @@
 	    /* This line is the start of a new paragraph. */
 	    start_para_line=linecnt;
 	    /* Capture its first line in case we want to report it later. */
-	    strncpy(parastart,aline,80);
-	    parastart[79]=0;
+	    g_free(parastart);
+	    parastart=g_strdup(aline);
 	    memset(&parities,0,sizeof(parities));  /* restart the quote count */
 	    s=aline;
 	    while (!gcisalpha(*s) && !gcisdigit(*s) && *s)
@@ -2653,7 +2720,7 @@
 		else
 		    cnt_punct++;
 	    }
-	    isnewpara=0; /* Signal the end of new para processing. */
+	    isnewpara=FALSE; /* Signal the end of new para processing. */
 	}
 	/* Check for an em-dash broken at line end. */
 	if (enddash && *aline=='-')
@@ -2665,11 +2732,11 @@
 	    else
 		cnt_punct++;
 	}
-	enddash=0;
+	enddash=FALSE;
 	for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--)
 	    ;
 	if (s>=aline && *s=='-')
-	    enddash=1;
+	    enddash=TRUE;
 	check_for_control_characters(aline);
 	if (warnings->bin)
 	    check_for_odd_characters(aline,warnings,isemptyline);
@@ -2709,40 +2776,49 @@
 	    check_for_mismatched_quotes(&counters,&pending);
 	    memset(&counters,0,sizeof(counters));
 	    /* let the next iteration know that it's starting a new para */
-	    isnewpara=1;
-	    check_for_omitted_punctuation(prevline,&last,start_para_line);
+	    isnewpara=TRUE;
+	    if (prevline)
+		check_for_omitted_punctuation(prevline,&last,start_para_line);
 	}
-	strcpy(prevline,aline);
+	g_free(prevline);
+	prevline=g_strdup(aline);
     }
-    fclose(infile);
+    if (prevline)
+    {
+	g_free(prevline);
+	prevline=NULL;
+    }
+    g_free(parastart);
+    g_free(prevline);
+    g_free(etext);
     if (!pswit[OVERVIEW_SWITCH])
-	for (i=0;i<MAX_QWORD;i++)
-	    if (dupcnt[i])
-		printf("\nNote: Queried word %s was duplicated %d time%s\n",
-		  qword[i],dupcnt[i],"s");
+	g_tree_foreach(qword,report_duplicate_queries,NULL);
+    g_tree_unref(qword);
+    g_tree_unref(qperiod);
 }
 
 /*
  * flgets:
  *
- * Get one line from the input stream, checking for
+ * Get one line from the input text, checking for
  * the existence of exactly one CR/LF line-end per line.
  *
  * Returns: a pointer to the line.
  */
-char *flgets(char *theline,int maxlen,FILE *thefile,long lcnt)
+char *flgets(char **etext,long lcnt)
 {
     char c;
-    int len,isCR,cint;
-    *theline=0;
-    len=isCR=0;
-    c=cint=fgetc(thefile);
-    do
+    int len;
+    gboolean isCR=FALSE;
+    char *theline=*etext;
+    len=0;
+    for(;;)
     {
-	if (cint==EOF)
+	c=*(*etext)++;
+	if (!c)
 	    return NULL;
 	/* either way, it's end of line */
-	if (c==10)
+	if (c=='\n')
 	{
 	    if (isCR)
 		break;
@@ -2752,7 +2828,7 @@
 		if (pswit[LINE_END_SWITCH])
 		{
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",theline);
+			printf("\n%*.*s\n",len,len,theline);
 		    if (!pswit[OVERVIEW_SWITCH])
 			printf("    Line %ld - No CR?\n",lcnt);
 		    else
@@ -2761,7 +2837,7 @@
 		break;
 	    }
 	}
-	if (c==13)
+	if (c=='\r')
 	{
 	    if (isCR)
 	    {
@@ -2769,34 +2845,33 @@
 		if (pswit[LINE_END_SWITCH])
 		{
 		    if (pswit[ECHO_SWITCH])
-			printf("\n%s\n",theline);
+			printf("\n%*.*s\n",len,len,theline);
 		    if (!pswit[OVERVIEW_SWITCH])
 			printf("    Line %ld - Two successive CRs?\n",lcnt);
 		    else
 			cnt_lineend++;
 		}
 	    }
-	    isCR=1;
+	    isCR=TRUE;
 	}
 	else
 	{
 	    if (pswit[LINE_END_SWITCH] && isCR)
 	    {
 		if (pswit[ECHO_SWITCH])
-		    printf("\n%s\n",theline);
+		    printf("\n%*.*s\n",len,len,theline);
 		if (!pswit[OVERVIEW_SWITCH])
 		    printf("    Line %ld column %d - CR without LF?\n",
 		      lcnt,len+1);
 		else
 		    cnt_lineend++;
+		theline[len]=' ';
 	    }
-	    theline[len]=c;
+	    isCR=FALSE;
 	    len++;
-	    theline[len]=0;
-	    isCR=0;
 	}
-	c=cint=fgetc(thefile);
-    } while(len<maxlen);
+    }
+    theline[len]='\0';
     if (pswit[MARKUP_SWITCH])  
 	postprocess_for_HTML(theline);
     if (pswit[DP_SWITCH])  
@@ -2813,10 +2888,10 @@
  *
  * Returns: 0 if no error found, 1 if error.
  */
-int mixdigit(char *checkword)
+int mixdigit(const char *checkword)
 {
     int wehaveadigit,wehavealetter,firstdigits,query,wl;
-    char *s;
+    const char *s;
     wehaveadigit=wehavealetter=query=0;
     for (s=checkword;*s;s++)
 	if (gcisalpha(*s))
@@ -2832,17 +2907,20 @@
 	for (firstdigits=0;gcisdigit(checkword[firstdigits]);firstdigits++)
 	    ;
 	/* digits, ending in st, rd, nd, th of either case */
-	if (firstdigits+2==wl && (matchword(checkword+wl-2,"st") ||
-	  matchword(checkword+wl-2,"rd") || matchword(checkword+wl-2,"nd") ||
-	  matchword(checkword+wl-2,"th")))
+	if (firstdigits+2==wl && (!g_ascii_strcasecmp(checkword+wl-2,"st") ||
+	  !g_ascii_strcasecmp(checkword+wl-2,"rd") ||
+	  !g_ascii_strcasecmp(checkword+wl-2,"nd") ||
+	  !g_ascii_strcasecmp(checkword+wl-2,"th")))
 	    query=0;
-	if (firstdigits+3==wl && (matchword(checkword+wl-3,"sts") ||
-	  matchword(checkword+wl-3,"rds") || matchword(checkword+wl-3,"nds") ||
-	  matchword(checkword+wl-3,"ths")))
+	if (firstdigits+3==wl && (!g_ascii_strcasecmp(checkword+wl-3,"sts") ||
+	  !g_ascii_strcasecmp(checkword+wl-3,"rds") ||
+	  !g_ascii_strcasecmp(checkword+wl-3,"nds") ||
+	  !g_ascii_strcasecmp(checkword+wl-3,"ths")))
 	    query=0;
-	if (firstdigits+3==wl && (matchword(checkword+wl-4,"stly") ||
-	  matchword(checkword+wl-4,"rdly") ||
-	  matchword(checkword+wl-4,"ndly") || matchword(checkword+wl-4,"thly")))
+	if (firstdigits+3==wl && (!g_ascii_strcasecmp(checkword+wl-4,"stly") ||
+	  !g_ascii_strcasecmp(checkword+wl-4,"rdly") ||
+	  !g_ascii_strcasecmp(checkword+wl-4,"ndly") ||
+	  !g_ascii_strcasecmp(checkword+wl-4,"thly")))
 	    query=0;
 	/* digits, ending in l, L, s or d */
 	if (firstdigits+1==wl && (checkword[wl-1]=='l' ||
@@ -2864,20 +2942,20 @@
 /*
  * getaword:
  *
- * Extracts the first/next "word" from the line, and puts
- * it into "thisword". A word is defined as one English word unit--or
- * at least that's the aim.
+ * Extracts the first/next "word" from the line, and returns it.
+ * A word is defined as one English word unit--or at least that's the aim.
+ * "ptr" is advanced to the position in the line where we will start
+ * looking for the next word.
  *
- * Returns: a pointer to the position in the line where we will start
- *	  looking for the next word.
+ * Returns: A newly-allocated string.
  */
-const char *getaword(const char *fromline,char *thisword)
+gchar *getaword(const char **ptr)
 {
-    int i,wordlen;
+    int i;
     const char *s;
-    wordlen=0;
-    for (;!gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline;
-      fromline++)
+    GString *word;
+    word=g_string_new(NULL);
+    for (;!gcisdigit(**ptr) && !gcisalpha(**ptr) && **ptr;(*ptr)++)
 	;
     /*
      * Use a look-ahead to handle exceptions for numbers like 1,000 and 1.35.
@@ -2887,64 +2965,25 @@
      * If found, it returns this whole pattern as a word; otherwise we discard
      * the results and resume our normal programming.
      */
-    s=fromline;
-    for (;(gcisdigit(*s) || gcisalpha(*s) || *s==',' || *s=='.') &&
-      wordlen<MAXWORDLEN;s++)
+    s=*ptr;
+    for (;gcisdigit(*s) || gcisalpha(*s) || *s==',' || *s=='.';s++)
+	g_string_append_c(word,*s);
+    for (i=1;i+1<word->len;i++)
     {
-	thisword[wordlen]=*s;
-	wordlen++;
-    }
-    thisword[wordlen]=0;
-    for (i=1;i<wordlen-1;i++)
-    {
-	if (thisword[i]=='.' || thisword[i]==',')
+	if (word->str[i]=='.' || word->str[i]==',')
 	{
-	    if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1]))
+	    if (gcisdigit(word->str[i-1]) && gcisdigit(word->str[i-1]))
 	    {
-		fromline=s;
-		return fromline;
+		*ptr=s;
+		return g_string_free(word,FALSE);
 	    }
 	}
     }
     /* we didn't find a punctuated number - do the regular getword thing */
-    wordlen=0;
-    for (;(gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline=='\'') &&
-      wordlen<MAXWORDLEN;fromline++)
-    {
-	thisword[wordlen]=*fromline;
-	wordlen++;
-    }
-    thisword[wordlen]=0;
-    return fromline;
-}
-
-/*
- * matchword:
- *
- * A case-insensitive string matcher.
- */
-int matchword(char *checkfor,char *thisword)
-{
-    unsigned int ismatch,i;
-    if (strlen(checkfor)!=strlen(thisword))
-	return 0;
-    ismatch=1;     /* assume a match until we find a difference */
-    for (i=0;i<strlen(checkfor);i++)
-	if (toupper(checkfor[i])!=toupper(thisword[i]))
-	    ismatch=0;
-    return ismatch;
-}
-
-/*
- * lowerit:
- *
- * Lowercase the line.
- */
-void lowerit(char *theline)
-{
-    for (;*theline;theline++)
-	if (*theline>='A' && *theline<='Z')
-	    *theline+=32;
+    g_string_truncate(word,0);
+    for (;gcisdigit(**ptr) || gcisalpha(**ptr) || **ptr=='\'';(*ptr)++)
+	g_string_append_c(word,**ptr);
+    return g_string_free(word,FALSE);
 }
 
 /*
@@ -2961,11 +3000,11 @@
  * XL or an optional XC, an optional IX or IV, an optional V and any number
  * of optional Is.
  */
-int isroman(char *t)
+gboolean isroman(const char *t)
 {
-    char *s;
+    const char *s;
     if (!t || !*t)
-	return 0;
+	return FALSE;
     s=t;
     while (*t=='m' && *t)
 	t++;
@@ -3006,19 +3045,19 @@
  * errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)
  * and ISO-8859-1 character sets, which are the most common PG 8-bit types.
  */
-int gcisalpha(unsigned char c)
+gboolean gcisalpha(unsigned char c)
 {
     if (c>='a' && c<='z')
-	return 1;
+	return TRUE;
     if (c>='A' && c<='Z')
-	return 1;
+	return TRUE;
     if (c<140)
-	return 0;
+	return FALSE;
     if (c>=192 && c!=208 && c!=215 && c!=222 && c!=240 && c!=247 && c!=254)
-	return 1;
+	return TRUE;
     if (c==140 || c==142 || c==156 || c==158 || c==159)
-	return 1;
-    return 0;
+	return TRUE;
+    return FALSE;
 }
 
 /*
@@ -3026,7 +3065,7 @@
  *
  * A version of isdigit() that doesn't get confused in 8-bit texts.
  */
-int gcisdigit(unsigned char c)
+gboolean gcisdigit(unsigned char c)
 {   
     return c>='0' && c<='9';
 }
@@ -3037,24 +3076,12 @@
  * A version of isletter() that doesn't get confused in 8-bit texts.
  * NB: this is ISO-8891-1-specific.
  */
-int gcisletter(unsigned char c)
+gboolean gcisletter(unsigned char c)
 {   
     return c>='A' && c<='Z' || c>='a' && c<='z' || c>=192;
 }
 
 /*
- * gcstrchr:
- *
- * Wraps strchr to return NULL if the character being searched for is zero.
- */
-char *gcstrchr(char *s,char c)
-{
-    if (!c)
-	return NULL;
-    return strchr(s,c);
-}
-
-/*
  * postprocess_for_DP:
  *
  * Invoked with the -d switch from flgets().
@@ -3097,7 +3124,7 @@
  */
 void postprocess_for_HTML(char *theline)
 {
-    if (strstr(theline,"<") && strstr(theline,">"))
+    if (strchr(theline,'<') && strchr(theline,'>'))
 	while (losemarkup(theline))
 	    ;
     while (loseentities(theline))
@@ -3171,9 +3198,9 @@
     return NULL;
 }
 
-int tagcomp(char *strin,char *basetag)
+int tagcomp(const char *strin,const char *basetag)
 {
-    char *s,*t;
+    const char *s,*t;
     s=basetag;
     t=strin;
     if (*t=='/')
@@ -3188,8 +3215,9 @@
     return 0;
 }
 
-void proghelp()
+void proghelp(GOptionContext *context)
 {
+    gchar *help;
     fputs("Bookloupe version " PACKAGE_VERSION ".\n",stderr);
     fputs("Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
     fputs("Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>.\n",stderr);
@@ -3198,22 +3226,10 @@
     fputs("This is Free Software; "
       "you may redistribute it under certain conditions (GPL);\n",stderr);
     fputs("read the file COPYING for details.\n\n",stderr);
-    fputs("Usage is: bookloupe [-setpxloyhud] filename\n",stderr);
-    fputs("  where -s checks single quotes, -e suppresses echoing lines, "
-      "-t checks typos\n",stderr);
-    fputs("  -x (paranoid) switches OFF -t and extra checks, "
-      "-l turns OFF line-end checks\n",stderr);
-    fputs("  -o just displays overview without detail, "
-      "-h echoes header fields\n",stderr);
-    fputs("  -v (verbose) unsuppresses duplicate reporting, "
-      "-m suppresses markup\n",stderr);
-    fputs("  -d ignores DP-specific markup,\n",stderr);
-    fputs("  -u uses a file gutcheck.typ to query user-defined "
-      "possible typos\n",stderr);
-    fputs("Sample usage: bookloupe warpeace.txt \n",stderr);
-    fputs("\n",stderr);
-    fputs("Bookloupe looks for errors in Project Gutenberg(TM) etexts.\n",
-      stderr);
+    help=g_option_context_get_help(context,TRUE,NULL);
+    fputs(help,stderr);
+    g_free(help);
+    fputs("Sample usage: bookloupe warpeace.txt\n\n",stderr);
     fputs("Bookloupe queries anything it thinks shouldn't be in a PG text; "
       "non-ASCII\n",stderr);
     fputs("characters like accented letters, "
diff -r adb087007d08 -r 1016349e619f configure.ac
--- a/configure.ac	Mon May 27 09:03:04 2013 +0100
+++ b/configure.ac	Tue May 28 15:17:19 2013 +0100
@@ -13,7 +13,8 @@
 test/compatibility/Makefile
 doc/Makefile
 ])
-AM_INIT_AUTOMAKE(no-define)
+AM_INIT_AUTOMAKE(no-define,1.11)
+AM_SILENT_RULES([yes])
 AC_CANONICAL_HOST
 
 ##################################################
diff -r adb087007d08 -r 1016349e619f test/compatibility/user-defined-typo.tst
--- a/test/compatibility/user-defined-typo.tst	Mon May 27 09:03:04 2013 +0100
+++ b/test/compatibility/user-defined-typo.tst	Tue May 28 15:17:19 2013 +0100
@@ -1,6 +1,6 @@
 **************** OPTIONS ****************
 -u
-**************** INPUT(gutcheck.typ) ****************
+**************** INPUT(bookloupe.typ) ****************
 arid
 **************** INPUT ****************
 I am the very model of a modern Major-General,
diff -r adb087007d08 -r 1016349e619f test/harness/testcaseparser.c
--- a/test/harness/testcaseparser.c	Mon May 27 09:03:04 2013 +0100
+++ b/test/harness/testcaseparser.c	Tue May 28 15:17:19 2013 +0100
@@ -91,9 +91,12 @@
 {
     TestcaseParser *parser;
     gsize len;
+    GError *err=NULL;
     parser=g_new0(TestcaseParser,1);
-    if (!file_get_contents_text(filename,&parser->contents,&len))
+    if (!file_get_contents_text(filename,&parser->contents,&len,&err))
     {
+	g_printerr("%s: %s\n",filename,err->message);
+	g_error_free(err);
 	g_free(parser);
 	return NULL;
     }