author ali <ali@juiblex.co.uk>

Fri Jan 27 10:30:16 2012 +0000 (2012-01-27)

changeset 5 f600b0d1fc5d

parent 4 218904410231

child 6 faab25d520dd

.hgignore file | annotate | diff | revisions

Makefile.am file | annotate | diff | revisions

README file | annotate | diff | revisions

bl/Makefile.am file | annotate | diff | revisions

bl/bl.h file | annotate | diff | revisions

bl/blstring.c file | annotate | diff | revisions

bl/blstring.h file | annotate | diff | revisions

bl/fileutils.c file | annotate | diff | revisions

bl/fileutils.h file | annotate | diff | revisions

bl/macros.h file | annotate | diff | revisions

bl/mem.c file | annotate | diff | revisions

bl/mem.h file | annotate | diff | revisions

bl/spawn.c file | annotate | diff | revisions

bl/spawn.h file | annotate | diff | revisions

bl/strfuncs.c file | annotate | diff | revisions

bl/strfuncs.h file | annotate | diff | revisions

bl/textfileutils.c file | annotate | diff | revisions

bl/textfileutils.h file | annotate | diff | revisions

bl/types.h file | annotate | diff | revisions

bl/utils.c file | annotate | diff | revisions

bl/utils.h file | annotate | diff | revisions

bookloupe/Makefile.am file | annotate | diff | revisions

bookloupe/bookloupe.c file | annotate | diff | revisions

bookloupe/bookloupe.typ.in file | annotate | diff | revisions

configure.ac file | annotate | diff | revisions

doc/Makefile.am file | annotate | diff | revisions

doc/bookloupe.txt file | annotate | diff | revisions

doc/gc-test.txt file | annotate | diff | revisions

doc/gutcheck.txt file | annotate | diff | revisions

doc/loupe-test.txt file | annotate | diff | revisions

gclib/Makefile.am file | annotate | diff | revisions

gclib/fileutils.c file | annotate | diff | revisions

gclib/fileutils.h file | annotate | diff | revisions

gclib/gclib.h file | annotate | diff | revisions

gclib/gcstring.c file | annotate | diff | revisions

gclib/gcstring.h file | annotate | diff | revisions

gclib/macros.h file | annotate | diff | revisions

gclib/mem.c file | annotate | diff | revisions

gclib/mem.h file | annotate | diff | revisions

gclib/spawn.c file | annotate | diff | revisions

gclib/spawn.h file | annotate | diff | revisions

gclib/strfuncs.c file | annotate | diff | revisions

gclib/strfuncs.h file | annotate | diff | revisions

gclib/textfileutils.c file | annotate | diff | revisions

gclib/textfileutils.h file | annotate | diff | revisions

gclib/types.h file | annotate | diff | revisions

gclib/utils.c file | annotate | diff | revisions

gclib/utils.h file | annotate | diff | revisions

gutcheck/Makefile.am file | annotate | diff | revisions

gutcheck/gutcheck.c file | annotate | diff | revisions

gutcheck/gutcheck.typ.in file | annotate | diff | revisions

test/compatibility/Makefile.am file | annotate | diff | revisions

test/harness/Makefile.am file | annotate | diff | revisions

test/harness/gc-test.c file | annotate | diff | revisions

test/harness/loupe-test.c file | annotate | diff | revisions

test/harness/testcase.c file | annotate | diff | revisions

test/harness/testcaseio.c file | annotate | diff | revisions

test/harness/testcaseparser.c file | annotate | diff | revisions

test/harness/testcaseparser.h file | annotate | diff | revisions
     1.1 --- a/.hgignore	Fri Jan 27 00:28:11 2012 +0000
     1.2 +++ b/.hgignore	Fri Jan 27 10:30:16 2012 +0000
     1.3 @@ -1,5 +1,5 @@
     1.4 -gutcheck-.*\.tar\.gz
     1.5 -gutcheck-.*/
     1.6 +bookloupe-.*\.tar\.gz
     1.7 +bookloupe-.*/
     1.8  Makefile$
     1.9  Makefile\.in
    1.10  aclocal\.m4
    1.11 @@ -17,6 +17,6 @@
    1.12  .*\.la
    1.13  .*\.lo
    1.14  .*\.exe
    1.15 -gutcheck/gutcheck\.typ
    1.16 -gutcheck/gutcheck
    1.17 -test/harness/gc-test
    1.18 +bookloupe/bookloupe\.typ
    1.19 +bookloupe/bookloupe
    1.20 +test/harness/loupe-test

     2.1 --- a/Makefile.am	Fri Jan 27 00:28:11 2012 +0000
     2.2 +++ b/Makefile.am	Fri Jan 27 10:30:16 2012 +0000
     2.3 @@ -1,1 +1,1 @@
     2.4 -SUBDIRS=gclib gutcheck test doc
     2.5 +SUBDIRS=bl bookloupe test doc

     3.1 --- a/README	Fri Jan 27 00:28:11 2012 +0000
     3.2 +++ b/README	Fri Jan 27 10:30:16 2012 +0000
     3.3 @@ -1,10 +1,10 @@
     3.4 -                                   gutcheck
     3.5 -                                   ========
     3.6 +                                   bookloupe
     3.7 +                                   =========
     3.8  
     3.9  General installation instructions can be found in INSTALL. The following
    3.10  aim to give a quick overview and some help for specific systems. Documentation
    3.11 -for gutcheck itself can be found in doc/gutcheck.txt and for the test
    3.12 -framework in doc/gc-test.txt.
    3.13 +for bookloupe itself can be found in doc/bookloupe.txt and for the test
    3.14 +framework in doc/loupe-test.txt.
    3.15  
    3.16  Linux
    3.17  -----
    3.18 @@ -43,12 +43,12 @@
    3.19  % sudo yum install mingw32-gcc pkgconfig mingw32-glib2-static \
    3.20    mingw32-gettext-static mingw32-iconv-static
    3.21  % ./configure --host=i686-w64-mingw32 --disable-shared \
    3.22 -  --bindir=/gutcheck --datadir=/
    3.23 +  --bindir=/bookloupe --datadir=/
    3.24  % make
    3.25  % mkdir build
    3.26  % make install DESTDIR=`pwd`/build
    3.27  
    3.28 -The contents of the build/gutcheck directory can then be copied to a
    3.29 +The contents of the build/bookloupe directory can then be copied to a
    3.30  Microsoft Windows machine.
    3.31  
    3.32  Depending on the version of mingw32-gcc you use, you may need to specify a

     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/bl/Makefile.am	Fri Jan 27 10:30:16 2012 +0000
     4.3 @@ -0,0 +1,10 @@
     4.4 +INCLUDES=-I$(top_srcdir)
     4.5 +AM_CFLAGS=$(GLIB_CFLAGS)
     4.6 +LIBS=$(GLIB_LIBS)
     4.7 +
     4.8 +noinst_LTLIBRARIES=libbl.la
     4.9 +libbl_la_SOURCES=bl.h textfileutils.c textfileutils.h spawn.c spawn.h
    4.10 +if !HAVE_GLIB
    4.11 +libbl_la_SOURCES+=macros.h types.h fileutils.c fileutils.h mem.c mem.h \
    4.12 +  strfuncs.c strfuncs.h blstring.c blstring.h utils.c utils.h
    4.13 +endif

     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/bl/bl.h	Fri Jan 27 10:30:16 2012 +0000
     5.3 @@ -0,0 +1,36 @@
     5.4 +#if HAVE_GLIB
     5.5 +
     5.6 +#include <glib.h>
     5.7 +#define BL_DIR_SEPARATOR G_DIR_SEPARATOR
     5.8 +#define BL_DIR_SEPARATOR_S G_DIR_SEPARATOR_S
     5.9 +#define BL_IS_DIR_SEPARATOR(c) G_IS_DIR_SEPARATOR(c)
    5.10 +#define boolean gboolean
    5.11 +#define String GString
    5.12 +#define mem_new0 g_new0
    5.13 +#define mem_free g_free
    5.14 +#define str_dup g_strdup
    5.15 +#define str_ndup g_strndup
    5.16 +#define path_get_basename g_path_get_basename
    5.17 +#define file_get_contents(filename,contents,length) \
    5.18 +  g_file_get_contents(filename,contents,length,NULL)
    5.19 +#define string_new g_string_new
    5.20 +#define string_append g_string_append
    5.21 +#define string_append_len g_string_append_len
    5.22 +#define string_append_c g_string_append_c
    5.23 +#define string_free g_string_free
    5.24 +#define string_set_size g_string_set_size
    5.25 +
    5.26 +#else	/* !HAVE_GLIB */
    5.27 +
    5.28 +#include <bl/macros.h>
    5.29 +#include <bl/types.h>
    5.30 +#include <bl/mem.h>
    5.31 +#include <bl/fileutils.h>
    5.32 +#include <bl/strfuncs.h>
    5.33 +#include <bl/blstring.h>
    5.34 +#include <bl/utils.h>
    5.35 +
    5.36 +#endif	/* HAVE_GLIB */
    5.37 +
    5.38 +#include <bl/textfileutils.h>
    5.39 +#include <bl/spawn.h>

     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/bl/blstring.c	Fri Jan 27 10:30:16 2012 +0000
     6.3 @@ -0,0 +1,90 @@
     6.4 +#include <stdlib.h>
     6.5 +#include <string.h>
     6.6 +#include <bl/blstring.h>
     6.7 +#include <bl/types.h>
     6.8 +#include <bl/mem.h>
     6.9 +#include <bl/strfuncs.h>
    6.10 +
    6.11 +/*
    6.12 + * Strings which manage their own memory
    6.13 + */
    6.14 +
    6.15 +String *string_new(const char *init)
    6.16 +{
    6.17 +    String *string=mem_new(String,1);
    6.18 +    if (!init)
    6.19 +	init="";
    6.20 +    string->len=strlen(init);
    6.21 +    string->alloc=string->len+1;
    6.22 +    string->str=str_dup(init);
    6.23 +    return string;
    6.24 +}
    6.25 +
    6.26 +/*
    6.27 + * Free a string and either return the contents (if free_segment is FALSE)
    6.28 + * or free the contents as well and return NULL (if free_segment is TRUE).
    6.29 + */
    6.30 +char *string_free(String *string,boolean free_segment)
    6.31 +{
    6.32 +    char *retval;
    6.33 +    if (free_segment)
    6.34 +    {
    6.35 +	mem_free(string->str);
    6.36 +	retval=NULL;
    6.37 +    }
    6.38 +    else
    6.39 +	retval=string->str;
    6.40 +    mem_free(string);
    6.41 +    return retval;
    6.42 +}
    6.43 +
    6.44 +/*
    6.45 + * Append a byte to string.
    6.46 + */
    6.47 +void string_append_c(String *string,char c)
    6.48 +{
    6.49 +    if (string->len+1==string->alloc)
    6.50 +    {
    6.51 +	string->alloc*=2;
    6.52 +	string->str=mem_renew(char,string->str,string->alloc);
    6.53 +    }
    6.54 +    string->str[string->len++]=c;
    6.55 +    string->str[string->len]='\0';
    6.56 +}
    6.57 +
    6.58 +/*
    6.59 + * Append len bytes from s to string. len may be passed as <0 if s is
    6.60 + * a nul-terminated string of unknown length.
    6.61 + */
    6.62 +void string_append_len(String *string,const char *s,ssize_t len)
    6.63 +{
    6.64 +    if (len<0)
    6.65 +	len=strlen(s);
    6.66 +    if (string->len+len>=string->alloc)
    6.67 +    {
    6.68 +	while (string->len+len>=string->alloc)
    6.69 +	    string->alloc*=2;
    6.70 +	string->str=mem_renew(char,string->str,string->alloc);
    6.71 +    }
    6.72 +    memcpy(string->str+string->len,s,len);
    6.73 +    string->len+=len;
    6.74 +    string->str[string->len]='\0';
    6.75 +}
    6.76 +
    6.77 +/*
    6.78 + * Sets the length of a String. If the length is less than the current length,
    6.79 + * the string will be truncated. If the length is greater than the current
    6.80 + * length, the contents of the newly added area are undefined. (However, as
    6.81 + * always, string->str[string->len] will be a nul byte.)
    6.82 + */
    6.83 +void string_set_size(String *string,size_t len)
    6.84 +{
    6.85 +    if (len>=string->alloc)
    6.86 +    {
    6.87 +	while (len>=string->alloc)
    6.88 +	    string->alloc*=2;
    6.89 +	string->str=mem_renew(char,string->str,string->alloc);
    6.90 +    }
    6.91 +    string->len=len;
    6.92 +    string->str[string->len]='\0';
    6.93 +}

     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/bl/blstring.h	Fri Jan 27 10:30:16 2012 +0000
     7.3 @@ -0,0 +1,18 @@
     7.4 +#ifndef BL_STRING_H
     7.5 +#define BL_STRING_H
     7.6 +
     7.7 +#include <unistd.h>
     7.8 +#include <bl/types.h>
     7.9 +
    7.10 +typedef struct {
    7.11 +    char *str;
    7.12 +    size_t alloc,len;
    7.13 +} String;
    7.14 +
    7.15 +String *string_new(const char *init);
    7.16 +char *string_free(String *string,boolean free_segment);
    7.17 +void string_append_c(String *string,char c);
    7.18 +void string_append_len(String *string,const char *s,ssize_t len);
    7.19 +#define string_append(string,s)		string_append_len(string,s,-1)
    7.20 +
    7.21 +#endif /* BL_STRING_H */

     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/bl/fileutils.c	Fri Jan 27 10:30:16 2012 +0000
     8.3 @@ -0,0 +1,46 @@
     8.4 +#include <stdlib.h>
     8.5 +#include <stdio.h>
     8.6 +#include <bl/macros.h>
     8.7 +#include <bl/mem.h>
     8.8 +#include <bl/fileutils.h>
     8.9 +#include <bl/blstring.h>
    8.10 +
    8.11 +/*
    8.12 + * Read a file into memory (which should be freed with mem_free when no
    8.13 + * longer required). Returns FALSE on error and outputs a suitable error
    8.14 + * message to stderr.
    8.15 + */
    8.16 +boolean file_get_contents(const char *filename,char **contents,size_t *length)
    8.17 +{
    8.18 +    FILE *fp;
    8.19 +    size_t n;
    8.20 +    char *buffer;
    8.21 +    String *string;
    8.22 +    fp=fopen(filename,"rb");
    8.23 +    if (!fp)
    8.24 +    {
    8.25 +	perror(filename);
    8.26 +	return FALSE;
    8.27 +    }
    8.28 +    buffer=mem_new(char,1024);
    8.29 +    string=string_new(NULL);
    8.30 +    do
    8.31 +    {
    8.32 +	n=fread(buffer,1,1024,fp);
    8.33 +	if (n<0)
    8.34 +	{
    8.35 +	    perror(filename);
    8.36 +	    string_free(string,TRUE);
    8.37 +	    mem_free(buffer);
    8.38 +	    free(fp);
    8.39 +	    return FALSE;
    8.40 +	}
    8.41 +	string_append_len(string,buffer,n);
    8.42 +    } while(n);
    8.43 +    mem_free(buffer);
    8.44 +    if (length)
    8.45 +	*length=string->len;
    8.46 +    *contents=string_free(string,FALSE);
    8.47 +    fclose(fp);
    8.48 +    return TRUE;
    8.49 +}

     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/bl/fileutils.h	Fri Jan 27 10:30:16 2012 +0000
     9.3 @@ -0,0 +1,8 @@
     9.4 +#ifndef BL_FILEUTILS_H
     9.5 +#define BL_FILEUTILS_H
     9.6 +
     9.7 +#include <bl/types.h>
     9.8 +
     9.9 +boolean file_get_contents(const char *filename,char **contents,size_t *length);
    9.10 +
    9.11 +#endif /* BL_FILEUTILS_H */

    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/bl/macros.h	Fri Jan 27 10:30:16 2012 +0000
    10.3 @@ -0,0 +1,7 @@
    10.4 +#ifndef FALSE
    10.5 +#define FALSE	0
    10.6 +#endif
    10.7 +
    10.8 +#ifndef TRUE
    10.9 +#define TRUE	(!FALSE)
   10.10 +#endif

    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/bl/mem.c	Fri Jan 27 10:30:16 2012 +0000
    11.3 @@ -0,0 +1,54 @@
    11.4 +#include <stdlib.h>
    11.5 +#include <stdio.h>
    11.6 +#include <string.h>
    11.7 +#include <bl/mem.h>
    11.8 +
    11.9 +/*
   11.10 + * A memory allocator that aborts on failure (so that the caller never
   11.11 + * needs to handle out of memory, which we assume is very unlikely to
   11.12 + * happen under normal circumstances on any modern machine).
   11.13 + */
   11.14 +void *mem_alloc(size_t nmemb,size_t size)
   11.15 +{
   11.16 +    void *ptr=malloc(nmemb*size);
   11.17 +    if (!ptr)
   11.18 +    {
   11.19 +	fprintf(stderr,
   11.20 +	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
   11.21 +	  (unsigned long)nmemb,(unsigned long)size);
   11.22 +	abort();
   11.23 +    }
   11.24 +    return ptr;
   11.25 +}
   11.26 +
   11.27 +/*
   11.28 + * As mem_new, but new memory is cleared to zero.
   11.29 + */
   11.30 +void *mem_alloc0(size_t nmemb,size_t size)
   11.31 +{
   11.32 +    void *ptr=calloc(nmemb,size);
   11.33 +    if (!ptr)
   11.34 +    {
   11.35 +	fprintf(stderr,
   11.36 +	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
   11.37 +	  (unsigned long)nmemb,(unsigned long)size);
   11.38 +	abort();
   11.39 +    }
   11.40 +    return ptr;
   11.41 +}
   11.42 +
   11.43 +/*
   11.44 + * Grow or shrink a memory block, aborting on failure.
   11.45 + */
   11.46 +void *mem_realloc(void *ptr,size_t nmemb,size_t size)
   11.47 +{
   11.48 +    ptr=realloc(ptr,nmemb*size);
   11.49 +    if (!ptr)
   11.50 +    {
   11.51 +	fprintf(stderr,
   11.52 +	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
   11.53 +	  (unsigned long)nmemb,(unsigned long)size);
   11.54 +	abort();
   11.55 +    }
   11.56 +    return ptr;
   11.57 +}

    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/bl/mem.h	Fri Jan 27 10:30:16 2012 +0000
    12.3 @@ -0,0 +1,13 @@
    12.4 +#ifndef BL_MEM_H
    12.5 +#define BL_MEM_H
    12.6 +
    12.7 +void *mem_alloc(size_t nmemb,size_t size);
    12.8 +void *mem_alloc0(size_t nmemb,size_t size);
    12.9 +void *mem_realloc(void *ptr,size_t nmemb,size_t size);
   12.10 +
   12.11 +#define mem_new(type,n)		((type *)mem_alloc(n,sizeof(type)))
   12.12 +#define mem_new0(type,n)	((type *)mem_alloc0(n,sizeof(type)))
   12.13 +#define mem_renew(type,ptr,n)	((type *)mem_realloc(ptr,n,sizeof(type)))
   12.14 +#define mem_free(ptr)		free(ptr)
   12.15 +
   12.16 +#endif /* BL_MEM_H */

    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/bl/spawn.c	Fri Jan 27 10:30:16 2012 +0000
    13.3 @@ -0,0 +1,84 @@
    13.4 +#include <stdlib.h>
    13.5 +#include <stdio.h>
    13.6 +#ifndef WIN32
    13.7 +#include <sys/wait.h>
    13.8 +#endif
    13.9 +#include <bl/bl.h>
   13.10 +
   13.11 +#define SPAWN_BUFSIZE	128
   13.12 +
   13.13 +boolean spawn_sync(char **argv,char **standard_output,int *exit_status)
   13.14 +{
   13.15 +/* Don't use g_spawn_sync on WIN32 for now to avoid needing the helper */
   13.16 +#if HAVE_GLIB && !defined(WIN32)
   13.17 +    char *standard_error;
   13.18 +    GError *error=NULL;
   13.19 +    gboolean retval;
   13.20 +    GSpawnFlags flags=G_SPAWN_SEARCH_PATH;
   13.21 +    if (!standard_output)
   13.22 +	flags=G_SPAWN_STDOUT_TO_DEV_NULL;
   13.23 +    retval=g_spawn_sync(NULL,argv,NULL,flags,NULL,NULL,standard_output,
   13.24 +      &standard_error,exit_status,&error);
   13.25 +    fputs(standard_error,stderr);
   13.26 +    g_free(standard_error);
   13.27 +    if (!retval)
   13.28 +    {
   13.29 +	fprintf(stderr,"%s\n",error->message);
   13.30 +	g_error_free(error);
   13.31 +    }
   13.32 +    else if (exit_status)
   13.33 +	*exit_status=WEXITSTATUS(*exit_status);
   13.34 +    return retval;
   13.35 +#else
   13.36 +    FILE *fp;
   13.37 +    int i,r;
   13.38 +    size_t n,len;
   13.39 +    String *command_line,*string;
   13.40 +    command_line=string_new(NULL);
   13.41 +    for(i=0;argv[i];i++)
   13.42 +    {
   13.43 +	if (i)
   13.44 +	    string_append_c(command_line,' ');
   13.45 +	string_append(command_line,argv[i]);
   13.46 +    }
   13.47 +    fp=popen(command_line->str,"r");
   13.48 +    string_free(command_line,TRUE);
   13.49 +    if (!fp)
   13.50 +    {
   13.51 +	perror(command_line->str);
   13.52 +	return FALSE;
   13.53 +    }
   13.54 +    string=string_new(NULL);
   13.55 +    do
   13.56 +    {
   13.57 +	len=string->len;
   13.58 +	string_set_size(string,len+SPAWN_BUFSIZE);
   13.59 +	n=fread(string->str+len,1,SPAWN_BUFSIZE,fp);
   13.60 +	if (n<0)
   13.61 +	{
   13.62 +	    perror("fread");
   13.63 +	    (void)pclose(fp);
   13.64 +	    string_free(string,TRUE);
   13.65 +	    return FALSE;
   13.66 +	}
   13.67 +	string_set_size(string,len+n);
   13.68 +    } while(n);
   13.69 +    r=pclose(fp);
   13.70 +    if (r<0)
   13.71 +    {
   13.72 +	perror("pclose");
   13.73 +	string_free(string,TRUE);
   13.74 +	return FALSE;
   13.75 +    }
   13.76 +    else
   13.77 +    {
   13.78 +	if (exit_status)
   13.79 +	    *exit_status=r;
   13.80 +	if (standard_output)
   13.81 +	    *standard_output=string_free(string,FALSE);
   13.82 +	else
   13.83 +	    string_free(string,TRUE);
   13.84 +	return TRUE;
   13.85 +    }
   13.86 +#endif
   13.87 +}

    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/bl/spawn.h	Fri Jan 27 10:30:16 2012 +0000
    14.3 @@ -0,0 +1,8 @@
    14.4 +#ifndef BL_SPAWN_H
    14.5 +#define BL_SPAWN_H
    14.6 +
    14.7 +#include <bl/bl.h>
    14.8 +
    14.9 +boolean spawn_sync(char **argv,char **standard_output,int *exit_status);
   14.10 +
   14.11 +#endif /* BL_SPAWN_H */

    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/bl/strfuncs.c	Fri Jan 27 10:30:16 2012 +0000
    15.3 @@ -0,0 +1,26 @@
    15.4 +#include <stdlib.h>
    15.5 +#include <string.h>
    15.6 +#include <bl/mem.h>
    15.7 +#include <bl/strfuncs.h>
    15.8 +
    15.9 +/*
   15.10 + * Like strndup, but only returns NULL if str is NULL.
   15.11 + * Note that this routine copies n bytes rather than n characters.
   15.12 + */
   15.13 +char *str_ndup(const char *str,size_t n)
   15.14 +{
   15.15 +    char *dup;
   15.16 +    if (!str)
   15.17 +	return NULL;
   15.18 +    dup=mem_alloc0(n+1,1);
   15.19 +    strncpy(dup,str,n);
   15.20 +    return dup;
   15.21 +}
   15.22 +
   15.23 +/*
   15.24 + * Like strdup, but only returns NULL if str is NULL.
   15.25 + */
   15.26 +char *str_dup(const char *str)
   15.27 +{
   15.28 +    return str_ndup(str,strlen(str));
   15.29 +}

    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/bl/strfuncs.h	Fri Jan 27 10:30:16 2012 +0000
    16.3 @@ -0,0 +1,7 @@
    16.4 +#ifndef BL_STRFUNCS_H
    16.5 +#define BL_STRFUNCS_H
    16.6 +
    16.7 +char *str_dup(const char *str);
    16.8 +char *str_ndup(const char *str,size_t n);
    16.9 +
   16.10 +#endif /* BL_STRFUNCS_H */

    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/bl/textfileutils.c	Fri Jan 27 10:30:16 2012 +0000
    17.3 @@ -0,0 +1,33 @@
    17.4 +#include <stdlib.h>
    17.5 +#include <stdio.h>
    17.6 +#include <bl/bl.h>
    17.7 +
    17.8 +/*
    17.9 + * Read a file into memory (which should be freed with mem_free when no
   17.10 + * longer required). Returns NULL on error and outputs a suitable error
   17.11 + * message to stderr.
   17.12 + * DOS-style line endings are handled transparently even on platforms which
   17.13 + * don't normally use this format.
   17.14 + */
   17.15 +boolean file_get_contents_text(const char *filename,char **contents,
   17.16 +  size_t *length)
   17.17 +{
   17.18 +    int i;
   17.19 +    char *raw;
   17.20 +    size_t raw_length;
   17.21 +    String *string;
   17.22 +    if (!file_get_contents(filename,&raw,&raw_length))
   17.23 +	return FALSE;
   17.24 +    string=string_new(NULL);
   17.25 +    for(i=0;i<raw_length;i++)
   17.26 +	if (raw[i]!='\r')
   17.27 +	    string_append_c(string,raw[i]);
   17.28 +    mem_free(raw);
   17.29 +    if (length)
   17.30 +	*length=string->len;
   17.31 +    if (contents)
   17.32 +	*contents=string_free(string,FALSE);
   17.33 +    else
   17.34 +	string_free(string,TRUE);
   17.35 +    return TRUE;
   17.36 +}

    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/bl/textfileutils.h	Fri Jan 27 10:30:16 2012 +0000
    18.3 @@ -0,0 +1,9 @@
    18.4 +#ifndef BL_TEXTFILEUTILS_H
    18.5 +#define BL_TEXTFILEUTILS_H
    18.6 +
    18.7 +#include <bl/bl.h>
    18.8 +
    18.9 +boolean file_get_contents_text(const char *filename,char **contents,
   18.10 +  size_t *length);
   18.11 +
   18.12 +#endif /* BL_TEXTFILEUTILS_H */

    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/bl/types.h	Fri Jan 27 10:30:16 2012 +0000
    19.3 @@ -0,0 +1,6 @@
    19.4 +#ifndef BL_TYPES_H
    19.5 +#define BL_TYPES_H
    19.6 +
    19.7 +typedef int boolean;
    19.8 +
    19.9 +#endif	/* BL_TYPES_H */

    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/bl/utils.c	Fri Jan 27 10:30:16 2012 +0000
    20.3 @@ -0,0 +1,46 @@
    20.4 +#include <stdlib.h>
    20.5 +#include <string.h>
    20.6 +#include <unistd.h>
    20.7 +#include <bl/mem.h>
    20.8 +#include <bl/strfuncs.h>
    20.9 +#include <bl/utils.h>
   20.10 +
   20.11 +#define is_valid_drive(d)	((d)>='a' && (d)<='z' || (d)>='A' && (d)<='Z')
   20.12 +
   20.13 +/*
   20.14 + * Gets the last component of the filename. If filename ends with a directory
   20.15 + * separator it gets the component before the last slash. If filename consists
   20.16 + * only of directory separators (and on Windows, possibly a drive letter), a
   20.17 + * single separator is returned. If filename is empty, it gets ".".
   20.18 + */
   20.19 +char *path_get_basename(const char *filename)
   20.20 +{
   20.21 +    ssize_t base,last_nonslash;
   20.22 +    size_t len;
   20.23 +    char *retval;
   20.24 +    if (*filename=='\0')
   20.25 +        return str_dup(".");
   20.26 +    last_nonslash=strlen(filename)-1;
   20.27 +    while (last_nonslash>=0 && BL_IS_DIR_SEPARATOR(filename[last_nonslash]))
   20.28 +	last_nonslash--;
   20.29 +    if (last_nonslash<0)
   20.30 +	/* string only containing slashes */
   20.31 +    return str_dup(BL_DIR_SEPARATOR_S);
   20.32 +#ifdef WIN32
   20.33 +    if (last_nonslash==1 && is_valid_drive(filename[0]) && filename[1]==':')
   20.34 +	/* string only containing slashes and a drive */
   20.35 +	return str_dup(BL_DIR_SEPARATOR_S);
   20.36 +#endif
   20.37 +    base=last_nonslash;
   20.38 +    while (base>=0 && !BL_IS_DIR_SEPARATOR(filename[base]))
   20.39 +	base--;
   20.40 +#ifdef WIN32
   20.41 +    if (base==-1 && is_valid_drive(filename[0]) && filename[1] == ':')
   20.42 +	  base=1;
   20.43 +#endif
   20.44 +    len=last_nonslash-base;
   20.45 +    retval=mem_alloc(len+1,1);
   20.46 +    memcpy(retval,filename+base+1,len);
   20.47 +    retval[len]='\0';
   20.48 +    return retval;
   20.49 +}

    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/bl/utils.h	Fri Jan 27 10:30:16 2012 +0000
    21.3 @@ -0,0 +1,16 @@
    21.4 +#ifndef BL_UTIL_H
    21.5 +#define BL_UTIL_H
    21.6 +
    21.7 +#ifdef WIN32
    21.8 +#define BL_DIR_SEPARATOR '\\'
    21.9 +#define BL_DIR_SEPARATOR_S "\\"
   21.10 +#define BL_IS_DIR_SEPARATOR(c) ((c)==BL_DIR_SEPARATOR || (c)=='/')
   21.11 +#else
   21.12 +#define BL_DIR_SEPARATOR '/'
   21.13 +#define BL_DIR_SEPARATOR_S "/"
   21.14 +#define BL_IS_DIR_SEPARATOR(c) ((c)==BL_DIR_SEPARATOR)
   21.15 +#endif
   21.16 +
   21.17 +char *path_get_basename(const char *filename);
   21.18 +
   21.19 +#endif /* BL_UTIL_H */

    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/bookloupe/Makefile.am	Fri Jan 27 10:30:16 2012 +0000
    22.3 @@ -0,0 +1,8 @@
    22.4 +bin_PROGRAMS=bookloupe
    22.5 +pkgdata_DATA=bookloupe.typ
    22.6 +
    22.7 +bookloupe.typ:	bookloupe.typ.in
    22.8 +	sed 's/$$/\r/' $< > $@
    22.9 +
   22.10 +EXTRA_DIST=bookloupe.typ.in
   22.11 +CLEANFILES=bookloupe.typ

    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/bookloupe/bookloupe.c	Fri Jan 27 10:30:16 2012 +0000
    23.3 @@ -0,0 +1,2982 @@
    23.4 +/*************************************************************************/
    23.5 +/* gutcheck - check for assorted weirdnesses in a PG candidate text file */
    23.6 +/*                                                                       */
    23.7 +/* Version 0.991                                                         */
    23.8 +/* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>                  */
    23.9 +/*                                                                       */
   23.10 +/* This program is free software; you can redistribute it and/or modify  */
   23.11 +/* it under the terms of the GNU General Public License as published by  */
   23.12 +/* the Free Software Foundation; either version 2 of the License, or     */
   23.13 +/* (at your option) any later version.                                   */
   23.14 +/*                                                                       */
   23.15 +/* This program is distributed in the hope that it will be useful,       */
   23.16 +/* but WITHOUT ANY WARRANTY; without even the implied warranty of        */
   23.17 +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         */
   23.18 +/* GNU General Public License for more details.                          */
   23.19 +/*                                                                       */
   23.20 +/* You should have received a copy of the GNU General Public License     */
   23.21 +/* along with this program; if not, write to the                         */
   23.22 +/*      Free Software Foundation, Inc.,                                  */
   23.23 +/*      59 Temple Place,                                                 */
   23.24 +/*      Suite 330,                                                       */
   23.25 +/*      Boston, MA  02111-1307  USA                                      */
   23.26 +/*                                                                       */
   23.27 +/*                                                                       */
   23.28 +/*                                                                       */
   23.29 +/* Overview comments:                                                    */
   23.30 +/*                                                                       */
   23.31 +/* If you're reading this, you're either interested in how to detect     */
   23.32 +/* formatting errors, or very very bored.                                */
   23.33 +/*                                                                       */
   23.34 +/* Gutcheck is a homebrew formatting checker specifically for            */
   23.35 +/* spotting common formatting problems in a PG e-text. I typically       */
   23.36 +/* run it once or twice on a file I'm about to submit; it usually        */
   23.37 +/* finds a few formatting problems. It also usually finds lots of        */
   23.38 +/* queries that aren't problems at all; it _really_ doesn't like         */
   23.39 +/* the standard PG header, for example.  It's optimized for straight     */
   23.40 +/* prose; poetry and non-fiction involving tables tend to trigger        */
   23.41 +/* false alarms.                                                         */
   23.42 +/*                                                                       */
   23.43 +/* The code of gutcheck is not very interesting, but the experience      */
   23.44 +/* of what constitutes a possible error may be, and the best way to      */
   23.45 +/* illustrate that is by example.                                        */
   23.46 +/*                                                                       */
   23.47 +/*                                                                       */
   23.48 +/* Here are some common typos found in PG texts that gutcheck            */
   23.49 +/* will flag as errors:                                                  */
   23.50 +/*                                                                       */
   23.51 +/* "Look!John , over there!"                                             */
   23.52 +/* <this is a HTML tag>                                                  */
   23.53 +/* &so is this;                                                          */
   23.54 +/* Margaret said: " Now you should start for school."                    */
   23.55 +/* Margaret said: "Now you should start for school. (if end of para)     */
   23.56 +/* The horse is said to he worth a lot.                                  */
   23.57 +/* 0K - this'11 make you look close1y.                                   */
   23.58 +/* "If you do. you'll regret it!"                                        */
   23.59 +/*                                                                       */
   23.60 +/* There are some complications . The extra space left around that       */
   23.61 +/* period was an error . . . but that ellipsis wasn't.                   */
   23.62 +/*                                                                       */
   23.63 +/* The last line of a paragraph                                          */
   23.64 +/* is usually short.                                                     */
   23.65 +/*                                                                       */
   23.66 +/* This period is an error.But the periods in a.m. aren't.               */
   23.67 +/*                                                                       */
   23.68 +/* Checks that are do-able but not (well) implemented are:               */
   23.69 +/*        Single-quote chcking.                                          */
   23.70 +/*          Despite 3 attempts at it, singlequote checking is still      */
   23.71 +/*          crap in gutcheck. It may not be possible without analysis    */
   23.72 +/*          of the whole paragraph.                                      */
   23.73 +/*                                                                       */
   23.74 +/*************************************************************************/
   23.75 +
   23.76 +
   23.77 +#include <stdio.h>
   23.78 +#include <stdlib.h>
   23.79 +#include <string.h>
   23.80 +#include <ctype.h>
   23.81 +
   23.82 +#define MAXWORDLEN    80    /* max length of one word             */
   23.83 +#define LINEBUFSIZE 2048    /* buffer size for an input line      */
   23.84 +
   23.85 +#define MAX_USER_TYPOS 1000
   23.86 +#define USERTYPO_FILE "gutcheck.typ"
   23.87 +
   23.88 +#ifndef MAX_PATH
   23.89 +#define MAX_PATH 16384
   23.90 +#endif
   23.91 +
   23.92 +char aline[LINEBUFSIZE];
   23.93 +char prevline[LINEBUFSIZE];
   23.94 +
   23.95 +                 /* Common typos. */
   23.96 +char *typo[] = { "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane", "nad",
   23.97 +                "te", "ig", "acn",  "ahve", "alot", "anbd", "andt", "awya", "aywa", "bakc", "om",
   23.98 +                "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt", "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr",
   23.99 +                "hmi", "hse", "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd", "gerat", "goign",
  23.100 +                "gruop", "haev", "hda", "hearign", "seeign", "sayign", "herat", "hge", "hsa", "hsi", "hte", "htere",
  23.101 +                "htese", "htey", "htis", "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut", "loev",
  23.102 +                "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter", "omre", "onyl", "otehr", "otu", "owrk",
  23.103 +                "owuld", "peice", "peices", "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",
  23.104 +                "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe", "sohw", "stnad", "stopry",
  23.105 +                "stoyr", "stpo", "tahn", "taht", "tath", "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge",
  23.106 +                "thier", "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne", "tirne", "tkae",
  23.107 +                "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey", "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih",
  23.108 +                "whihc", "whta", "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",
  23.109 +                "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking", "wtih", "wuould", "wya", "yera",
  23.110 +                "yeras", "yersa", "yoiu", "youve", "ytou", "yuor",
  23.111 +                /* added h/b words for version 12 - removed a few with "tbe" v.25 */
  23.112 +                "abead", "ahle", "ahout", "ahove", "altbough", "balf", "bardly", "bas", "bave", "baving", "bebind", 
  23.113 +                "beld", "belp", "belped", "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge", "dehates", 
  23.114 +                "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan", "hegin", "heing", 
  23.115 +                "helieve", "henefit", "hetter", "hetween", "heyond", "hig", "higber", "huild", "huy", "hy", "jobn", "joh", 
  23.116 +                "meanwbile", "memher", "memhers", "numher", "numhers", 
  23.117 +                "perbaps", "prohlem", "puhlic", "witbout", 
  23.118 +                /* and a few more for .18 */
  23.119 +                "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud", "prornise", "prornised", "modem", "bo",
  23.120 +                "heside", "chapteb", "chaptee", "se",
  23.121 +                 ""};
  23.122 +
  23.123 +char *usertypo[MAX_USER_TYPOS];
  23.124 +
  23.125 +                 /* Common abbreviations and other OK words not to query as typos. */
  23.126 +                 /* 0.99 last-minute - removed "ms"      */
  23.127 +char *okword[] = {"mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm", "rd", "sh", "br",
  23.128 +                  "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "pompeii","hawaii","hawaiian",
  23.129 +                  "hotbed", "heartbeat", "heartbeats", "outbid", "outbids", "frostbite", "frostbitten",
  23.130 +                  ""};
  23.131 +
  23.132 +                 /* Common abbreviations that cause otherwise unexplained periods. */
  23.133 +char *abbrev[] = {"cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op", "cit",
  23.134 +                  "deg", "min", "chap", "oz", "mme", "mlle", "mssrs",
  23.135 +                  ""};
  23.136 +                 /* Two-Letter combinations that rarely if ever start words, */
  23.137 +                 /* but are common scannos or otherwise common letter        */
  23.138 +                 /* combinations.                                            */
  23.139 +char *nostart[] = { "hr", "hl", "cb", "sb", "tb", "wb", "tl",
  23.140 +                    "tn", "rn", "lt", "tj",
  23.141 +                    "" };
  23.142 +
  23.143 +                 /* Two-Letter combinations that rarely if ever end words    */
  23.144 +                 /* but are common scannos or otherwise common letter        */
  23.145 +                 /* combinations                                             */
  23.146 +char *noend[]   = { "cb", "gb", "pb", "sb", "tb", 
  23.147 +                    "wh","fr","br","qu","tw","gl","fl","sw","gr","sl","cl",
  23.148 +                    "iy",
  23.149 +                    ""};
  23.150 +
  23.151 +char *markup[]  = { "a", "b", "big", "blockquote", "body", "br", "center", 
  23.152 +                    "col", "div", "em", "font", "h1", "h2", "h3", "h4", 
  23.153 +                    "h5", "h6", "head", "hr", "html", "i", "img", "li", 
  23.154 +                    "meta", "ol", "p", "pre", "small", "span", "strong", 
  23.155 +                    "sub", "sup", "table", "td", "tfoot", "thead", "title", 
  23.156 +                    "tr", "tt", "u", "ul", 
  23.157 +                    ""};
  23.158 +
  23.159 +char *DPmarkup[] = { "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>",
  23.160 +                    ""}; /* <tb> added .991 */
  23.161 +
  23.162 +char *nocomma[]  = { "the", "it's", "their", "an", "mrs", "a", "our", "that's",
  23.163 +                     "its", "whose", "every", "i'll", "your", "my", 
  23.164 +                     "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "rd", 
  23.165 +                     "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", 
  23.166 +                     "i'm", "during", "let", "toward", "among",
  23.167 +                     ""};
  23.168 +
  23.169 +
  23.170 +char *noperiod[] = { "every", "i'm", "during", "that's", "their", "your", "our", "my", "or", 
  23.171 +                     "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether", 
  23.172 +                     "i'll", "whose", "who", "because", "when", "let", "till", "very",
  23.173 +                     "an", "among", "those", "into", "whom", "having", "thence",
  23.174 +                     ""}; 
  23.175 +
  23.176 +
  23.177 +char vowels[] = "aeiouàáâãäæèéêëìíîïòóôõöùúûü";  /* Carlo's old suggestion, updated .991 */
  23.178 +
  23.179 +struct {
  23.180 +    char *htmlent;
  23.181 +    char *htmlnum;
  23.182 +    char *textent;
  23.183 +    } entities[] = { "&amp;",           "&#38;",        "&", 
  23.184 +                     "&lt;",            "&#60;",        "<",
  23.185 +                     "&gt;",            "&#62;",        ">",
  23.186 +                     "&deg;",           "&#176;",       " degrees",
  23.187 +                     "&pound;",         "&#163;",       "L",
  23.188 +                     "&quot;",          "&#34;",        "\"",   /* -- quotation mark = APL quote, */
  23.189 +                     "&OElig;",         "&#338;",       "OE",  /* -- latin capital ligature OE, */
  23.190 +                     "&oelig;",         "&#339;",       "oe",  /* -- latin small ligature oe, U+0153 ISOlat2 --> */
  23.191 +                     "&Scaron;",        "&#352;",       "S",  /* -- latin capital letter S with caron, */
  23.192 +                     "&scaron;",        "&#353;",       "s",  /* -- latin small letter s with caron, */
  23.193 +                     "&Yuml;",          "&#376;",       "Y",  /* -- latin capital letter Y with diaeresis, */
  23.194 +                     "&circ;",          "&#710;",       "",  /* -- modifier letter circumflex accent, */
  23.195 +                     "&tilde;",         "&#732;",       "~",  /* -- small tilde, U+02DC ISOdia --> */
  23.196 +                     "&ensp;",          "&#8194;",      " ", /* -- en space, U+2002 ISOpub --> */
  23.197 +                     "&emsp;",          "&#8195;",      " ", /* -- em space, U+2003 ISOpub --> */
  23.198 +                     "&thinsp;",        "&#8201;",      " ", /* -- thin space, U+2009 ISOpub --> */
  23.199 +                     "&ndash;",         "&#8211;",      "-", /* -- en dash, U+2013 ISOpub --> */
  23.200 +                     "&mdash;",         "&#8212;",      "--", /* -- em dash, U+2014 ISOpub --> */
  23.201 +                     "&lsquo;",         "&#8216;",      "'", /* -- left single quotation mark, */
  23.202 +                     "&rsquo;",         "&#8217;",      "'", /* -- right single quotation mark, */
  23.203 +                     "&sbquo;",         "&#8218;",      "'", /* -- single low-9 quotation mark, U+201A NEW --> */
  23.204 +                     "&ldquo;",         "&#8220;",      "\"", /* -- left double quotation mark, */
  23.205 +                     "&rdquo;",         "&#8221;",      "\"", /* -- right double quotation mark, */
  23.206 +                     "&bdquo;",         "&#8222;",      "\"", /* -- double low-9 quotation mark, U+201E NEW --> */
  23.207 +                     "&lsaquo;",        "&#8249;",      "\"", /* -- single left-pointing angle quotation mark, */
  23.208 +                     "&rsaquo;",        "&#8250;",      "\"", /* -- single right-pointing angle quotation mark, */
  23.209 +                     "&nbsp;",          "&#160;",       " ", /* -- no-break space = non-breaking space, */
  23.210 +                     "&iexcl;",         "&#161;",       "!", /* -- inverted exclamation mark, U+00A1 ISOnum --> */
  23.211 +                     "&cent;",          "&#162;",       "c", /* -- cent sign, U+00A2 ISOnum --> */
  23.212 +                     "&pound;",         "&#163;",       "L", /* -- pound sign, U+00A3 ISOnum --> */
  23.213 +                     "&curren;",        "&#164;",       "$", /* -- currency sign, U+00A4 ISOnum --> */
  23.214 +                     "&yen;",           "&#165;",       "Y", /* -- yen sign = yuan sign, U+00A5 ISOnum --> */
  23.215 +                     "&sect;",          "&#167;",       "--", /* -- section sign, U+00A7 ISOnum --> */
  23.216 +                     "&uml;",           "&#168;",       " ", /* -- diaeresis = spacing diaeresis, */
  23.217 +                     "&copy;",          "&#169;",       "(C) ", /* -- copyright sign, U+00A9 ISOnum --> */
  23.218 +                     "&ordf;",          "&#170;",       " ", /* -- feminine ordinal indicator, U+00AA ISOnum --> */
  23.219 +                     "&laquo;",         "&#171;",       "\"", /* -- left-pointing double angle quotation mark */
  23.220 +                     "&shy;",           "&#173;",       "-", /* -- soft hyphen = discretionary hyphen, */
  23.221 +                     "&reg;",           "&#174;",       "(R) ", /* -- registered sign = registered trade mark sign, */
  23.222 +                     "&macr;",          "&#175;",       " ", /* -- macron = spacing macron = overline */
  23.223 +                     "&deg;",           "&#176;",       " degrees", /* -- degree sign, U+00B0 ISOnum --> */
  23.224 +                     "&plusmn;",        "&#177;",       "+-", /* -- plus-minus sign = plus-or-minus sign, */
  23.225 +                     "&sup2;",          "&#178;",       "2", /* -- superscript two = superscript digit two */
  23.226 +                     "&sup3;",          "&#179;",       "3", /* -- superscript three = superscript digit three */
  23.227 +                     "&acute;",         "&#180;",       " ", /* -- acute accent = spacing acute, */
  23.228 +                     "&micro;",         "&#181;",       "m", /* -- micro sign, U+00B5 ISOnum --> */
  23.229 +                     "&para;",          "&#182;",       "--", /* -- pilcrow sign = paragraph sign, */
  23.230 +                     "&cedil;",         "&#184;",       " ", /* -- cedilla = spacing cedilla, U+00B8 ISOdia --> */
  23.231 +                     "&sup1;",          "&#185;",       "1", /* -- superscript one = superscript digit one, */
  23.232 +                     "&ordm;",          "&#186;",       " ", /* -- masculine ordinal indicator, */
  23.233 +                     "&raquo;",         "&#187;",       "\"", /* -- right-pointing double angle quotation mark */
  23.234 +                     "&frac14;",        "&#188;",       "1/4", /* -- vulgar fraction one quarter */
  23.235 +                     "&frac12;",        "&#189;",       "1/2", /* -- vulgar fraction one half */
  23.236 +                     "&frac34;",        "&#190;",       "3/4", /* -- vulgar fraction three quarters */
  23.237 +                     "&iquest;",        "&#191;",       "?", /* -- inverted question mark */
  23.238 +                     "&Agrave;",        "&#192;",       "A", /* -- latin capital letter A with grave */
  23.239 +                     "&Aacute;",        "&#193;",       "A", /* -- latin capital letter A with acute, */
  23.240 +                     "&Acirc;",         "&#194;",       "A", /* -- latin capital letter A with circumflex, */
  23.241 +                     "&Atilde;",        "&#195;",       "A", /* -- latin capital letter A with tilde, */
  23.242 +                     "&Auml;",          "&#196;",       "A", /* -- latin capital letter A with diaeresis, */
  23.243 +                     "&Aring;",         "&#197;",       "A", /* -- latin capital letter A with ring above */
  23.244 +                     "&AElig;",         "&#198;",       "AE", /* -- latin capital letter AE */
  23.245 +                     "&Ccedil;",        "&#199;",       "C", /* -- latin capital letter C with cedilla, */
  23.246 +                     "&Egrave;",        "&#200;",       "E", /* -- latin capital letter E with grave, */
  23.247 +                     "&Eacute;",        "&#201;",       "E", /* -- latin capital letter E with acute, */
  23.248 +                     "&Ecirc;",         "&#202;",       "E", /* -- latin capital letter E with circumflex, */
  23.249 +                     "&Euml;",          "&#203;",       "E", /* -- latin capital letter E with diaeresis, */
  23.250 +                     "&Igrave;",        "&#204;",       "I", /* -- latin capital letter I with grave, */
  23.251 +                     "&Iacute;",        "&#205;",       "I", /* -- latin capital letter I with acute, */
  23.252 +                     "&Icirc;",         "&#206;",       "I", /* -- latin capital letter I with circumflex, */
  23.253 +                     "&Iuml;",          "&#207;",       "I", /* -- latin capital letter I with diaeresis, */
  23.254 +                     "&ETH;",           "&#208;",       "E", /* -- latin capital letter ETH, U+00D0 ISOlat1 --> */
  23.255 +                     "&Ntilde;",        "&#209;",       "N", /* -- latin capital letter N with tilde, */
  23.256 +                     "&Ograve;",        "&#210;",       "O", /* -- latin capital letter O with grave, */
  23.257 +                     "&Oacute;",        "&#211;",       "O", /* -- latin capital letter O with acute, */
  23.258 +                     "&Ocirc;",         "&#212;",       "O", /* -- latin capital letter O with circumflex, */
  23.259 +                     "&Otilde;",        "&#213;",       "O", /* -- latin capital letter O with tilde, */
  23.260 +                     "&Ouml;",          "&#214;",       "O", /* -- latin capital letter O with diaeresis, */
  23.261 +                     "&times;",         "&#215;",       "*", /* -- multiplication sign, U+00D7 ISOnum --> */
  23.262 +                     "&Oslash;",        "&#216;",       "O", /* -- latin capital letter O with stroke */
  23.263 +                     "&Ugrave;",        "&#217;",       "U", /* -- latin capital letter U with grave, */
  23.264 +                     "&Uacute;",        "&#218;",       "U", /* -- latin capital letter U with acute, */
  23.265 +                     "&Ucirc;",         "&#219;",       "U", /* -- latin capital letter U with circumflex, */
  23.266 +                     "&Uuml;",          "&#220;",       "U", /* -- latin capital letter U with diaeresis, */
  23.267 +                     "&Yacute;",        "&#221;",       "Y", /* -- latin capital letter Y with acute, */
  23.268 +                     "&THORN;",         "&#222;",       "TH", /* -- latin capital letter THORN, */
  23.269 +                     "&szlig;",         "&#223;",       "sz", /* -- latin small letter sharp s = ess-zed, */
  23.270 +                     "&agrave;",        "&#224;",       "a", /* -- latin small letter a with grave */
  23.271 +                     "&aacute;",        "&#225;",       "a", /* -- latin small letter a with acute, */
  23.272 +                     "&acirc;",         "&#226;",       "a", /* -- latin small letter a with circumflex, */
  23.273 +                     "&atilde;",        "&#227;",       "a", /* -- latin small letter a with tilde, */
  23.274 +                     "&auml;",          "&#228;",       "a", /* -- latin small letter a with diaeresis, */
  23.275 +                     "&aring;",         "&#229;",       "a", /* -- latin small letter a with ring above */
  23.276 +                     "&aelig;",         "&#230;",       "ae", /* -- latin small letter ae */
  23.277 +                     "&ccedil;",        "&#231;",       "c", /* -- latin small letter c with cedilla, */
  23.278 +                     "&egrave;",        "&#232;",       "e", /* -- latin small letter e with grave, */
  23.279 +                     "&eacute;",        "&#233;",       "e", /* -- latin small letter e with acute, */
  23.280 +                     "&ecirc;",         "&#234;",       "e", /* -- latin small letter e with circumflex, */
  23.281 +                     "&euml;",          "&#235;",       "e", /* -- latin small letter e with diaeresis, */
  23.282 +                     "&igrave;",        "&#236;",       "i", /* -- latin small letter i with grave, */
  23.283 +                     "&iacute;",        "&#237;",       "i", /* -- latin small letter i with acute, */
  23.284 +                     "&icirc;",         "&#238;",       "i", /* -- latin small letter i with circumflex, */
  23.285 +                     "&iuml;",          "&#239;",       "i", /* -- latin small letter i with diaeresis, */
  23.286 +                     "&eth;",           "&#240;",       "eth", /* -- latin small letter eth, U+00F0 ISOlat1 --> */
  23.287 +                     "&ntilde;",        "&#241;",       "n", /* -- latin small letter n with tilde, */
  23.288 +                     "&ograve;",        "&#242;",       "o", /* -- latin small letter o with grave, */
  23.289 +                     "&oacute;",        "&#243;",       "o", /* -- latin small letter o with acute, */
  23.290 +                     "&ocirc;",         "&#244;",       "o", /* -- latin small letter o with circumflex, */
  23.291 +                     "&otilde;",        "&#245;",       "o", /* -- latin small letter o with tilde, */
  23.292 +                     "&ouml;",          "&#246;",       "o", /* -- latin small letter o with diaeresis, */
  23.293 +                     "&divide;",        "&#247;",       "/", /* -- division sign, U+00F7 ISOnum --> */
  23.294 +                     "&oslash;",        "&#248;",       "o", /* -- latin small letter o with stroke, */
  23.295 +                     "&ugrave;",        "&#249;",       "u", /* -- latin small letter u with grave, */
  23.296 +                     "&uacute;",        "&#250;",       "u", /* -- latin small letter u with acute, */
  23.297 +                     "&ucirc;",         "&#251;",       "u", /* -- latin small letter u with circumflex, */
  23.298 +                     "&uuml;",          "&#252;",       "u", /* -- latin small letter u with diaeresis, */
  23.299 +                     "&yacute;",        "&#253;",       "y", /* -- latin small letter y with acute, */
  23.300 +                     "&thorn;",         "&#254;",       "th", /* -- latin small letter thorn, */
  23.301 +                     "&yuml;",          "&#255;",       "y", /* -- latin small letter y with diaeresis, */
  23.302 +                      "", "" };
  23.303 +                    
  23.304 +/* ---- list of special characters ---- */
  23.305 +#define CHAR_SPACE        32
  23.306 +#define CHAR_TAB           9
  23.307 +#define CHAR_LF           10
  23.308 +#define CHAR_CR           13
  23.309 +#define CHAR_DQUOTE       34
  23.310 +#define CHAR_SQUOTE       39
  23.311 +#define CHAR_OPEN_SQUOTE  96
  23.312 +#define CHAR_TILDE       126
  23.313 +#define CHAR_ASTERISK     42
  23.314 +#define CHAR_FORESLASH    47
  23.315 +#define CHAR_CARAT        94
  23.316 +
  23.317 +#define CHAR_UNDERSCORE    '_'
  23.318 +#define CHAR_OPEN_CBRACK   '{'
  23.319 +#define CHAR_CLOSE_CBRACK  '}'
  23.320 +#define CHAR_OPEN_RBRACK   '('
  23.321 +#define CHAR_CLOSE_RBRACK  ')'
  23.322 +#define CHAR_OPEN_SBRACK   '['
  23.323 +#define CHAR_CLOSE_SBRACK  ']'
  23.324 +
  23.325 +
  23.326 +
  23.327 +
  23.328 +
  23.329 +/* ---- longest and shortest normal PG line lengths ----*/
  23.330 +#define LONGEST_PG_LINE   75
  23.331 +#define WAY_TOO_LONG      80
  23.332 +#define SHORTEST_PG_LINE  55
  23.333 +
  23.334 +#define SWITCHES "ESTPXLOYHWVMUD" /* switches:-                            */
  23.335 +                                  /*     D - ignore DP-specific markup     */
  23.336 +                                  /*     E - echo queried line             */
  23.337 +                                  /*     S - check single quotes           */
  23.338 +                                  /*     T - check common typos            */
  23.339 +                                  /*     P - require closure of quotes on  */
  23.340 +                                  /*         every paragraph               */
  23.341 +                                  /*     X - "Trust no one" :-) Paranoid!  */
  23.342 +                                  /*         Queries everything            */
  23.343 +                                  /*     L - line end checking defaults on */
  23.344 +                                  /*         -L turns it off               */
  23.345 +                                  /*     O - overview. Just shows counts.  */
  23.346 +                                  /*     Y - puts errors to stdout         */
  23.347 +                                  /*         instead of stderr             */
  23.348 +                                  /*     H - Echoes header fields          */
  23.349 +                                  /*     M - Ignore markup in < >          */
  23.350 +                                  /*     U - Use file of User-defined Typos*/
  23.351 +                                  /*     W - Defaults for use on Web upload*/
  23.352 +                                  /*     V - Verbose - list EVERYTHING!    */
  23.353 +#define SWITNO 14                 /* max number of switch parms            */
  23.354 +                                  /*        - used for defining array-size */
  23.355 +#define MINARGS   1               /* minimum no of args excl switches      */
  23.356 +#define MAXARGS   1               /* maximum no of args excl switches      */
  23.357 +
  23.358 +int pswit[SWITNO];                /* program switches set by SWITCHES      */
  23.359 +
  23.360 +#define ECHO_SWITCH      0
  23.361 +#define SQUOTE_SWITCH    1
  23.362 +#define TYPO_SWITCH      2
  23.363 +#define QPARA_SWITCH     3
  23.364 +#define PARANOID_SWITCH  4
  23.365 +#define LINE_END_SWITCH  5
  23.366 +#define OVERVIEW_SWITCH  6
  23.367 +#define STDOUT_SWITCH    7
  23.368 +#define HEADER_SWITCH    8
  23.369 +#define WEB_SWITCH       9
  23.370 +#define VERBOSE_SWITCH   10
  23.371 +#define MARKUP_SWITCH    11
  23.372 +#define USERTYPO_SWITCH  12
  23.373 +#define DP_SWITCH        13
  23.374 +
  23.375 +
  23.376 +
  23.377 +long cnt_dquot;       /* for overview mode, count of doublequote queries */
  23.378 +long cnt_squot;       /* for overview mode, count of singlequote queries */
  23.379 +long cnt_brack;       /* for overview mode, count of brackets queries */
  23.380 +long cnt_bin;         /* for overview mode, count of non-ASCII queries */
  23.381 +long cnt_odd;         /* for overview mode, count of odd character queries */
  23.382 +long cnt_long;        /* for overview mode, count of long line errors */
  23.383 +long cnt_short;       /* for overview mode, count of short line queries */
  23.384 +long cnt_punct;       /* for overview mode, count of punctuation and spacing queries */
  23.385 +long cnt_dash;        /* for overview mode, count of dash-related queries */
  23.386 +long cnt_word;        /* for overview mode, count of word queries */
  23.387 +long cnt_html;        /* for overview mode, count of html queries */
  23.388 +long cnt_lineend;     /* for overview mode, count of line-end queries */
  23.389 +long cnt_spacend;     /* count of lines with space at end  V .21 */
  23.390 +long linecnt;         /* count of total lines in the file */
  23.391 +long checked_linecnt; /* count of lines actually gutchecked V .26 */
  23.392 +
  23.393 +void proghelp(void);
  23.394 +void procfile(char *);
  23.395 +
  23.396 +#define LOW_THRESHOLD    0
  23.397 +#define HIGH_THRESHOLD   1
  23.398 +
  23.399 +#define START 0
  23.400 +#define END 1
  23.401 +#define PREV 0
  23.402 +#define NEXT 1
  23.403 +#define FIRST_OF_PAIR 0
  23.404 +#define SECOND_OF_PAIR 1
  23.405 +
  23.406 +#define MAX_WORDPAIR 1000
  23.407 +
  23.408 +char running_from[MAX_PATH];
  23.409 +
  23.410 +int mixdigit(char *);
  23.411 +char *getaword(char *, char *);
  23.412 +int matchword(char *, char *);
  23.413 +char *flgets(char *, int, FILE *, long);
  23.414 +void lowerit(char *);
  23.415 +int gcisalpha(unsigned char);
  23.416 +int gcisdigit(unsigned char);
  23.417 +int gcisletter(unsigned char);
  23.418 +char *gcstrchr(char *s, char c);
  23.419 +void postprocess_for_HTML(char *);
  23.420 +char *linehasmarkup(char *);
  23.421 +char *losemarkup(char *);
  23.422 +int tagcomp(char *, char *);
  23.423 +char *loseentities(char *);
  23.424 +int isroman(char *);
  23.425 +int usertypo_count;
  23.426 +void postprocess_for_DP(char *);
  23.427 +
  23.428 +char wrk[LINEBUFSIZE];
  23.429 +
  23.430 +/* This is disgustingly lazy, predefining max words & lengths,   */
  23.431 +/* but now I'm out of 16-bit restrictions, what's a couple of K? */
  23.432 +#define MAX_QWORD           50
  23.433 +#define MAX_QWORD_LENGTH    40
  23.434 +char qword[MAX_QWORD][MAX_QWORD_LENGTH];
  23.435 +char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
  23.436 +signed int dupcnt[MAX_QWORD];
  23.437 +
  23.438 +
  23.439 +
  23.440 +
  23.441 +int main(int argc, char **argv)
  23.442 +{
  23.443 +    char *argsw, *s;
  23.444 +    int i, switno, invarg;
  23.445 +    char usertypo_file[MAX_PATH];
  23.446 +    FILE *usertypofile;
  23.447 +
  23.448 +
  23.449 +    if (strlen(argv[0]) < sizeof(running_from))
  23.450 +        strcpy(running_from, argv[0]);  /* save the path to the executable gutcheck */
  23.451 +
  23.452 +    /* find out what directory we're running from */
  23.453 +    for (s = running_from + strlen(running_from); *s != '/' && *s != '\\' && s >= running_from; s--)
  23.454 +        *s = 0;
  23.455 +
  23.456 +
  23.457 +    switno = strlen(SWITCHES);
  23.458 +    for (i = switno ; --i >0 ; )
  23.459 +        pswit[i] = 0;           /* initialise switches */
  23.460 +
  23.461 +    /* Standard loop to extract switches.                   */
  23.462 +    /* When we come out of this loop, the arguments will be */
  23.463 +    /* in argv[0] upwards and the switches used will be     */
  23.464 +    /* represented by their equivalent elements in pswit[]  */
  23.465 +    while ( --argc > 0 && **++argv == '-')
  23.466 +        for (argsw = argv[0]+1; *argsw !='\0'; argsw++)
  23.467 +            for (i = switno, invarg = 1; (--i >= 0) && invarg == 1 ; )
  23.468 +                if ((toupper(*argsw)) == SWITCHES[i] ) {
  23.469 +                    invarg = 0;
  23.470 +                    pswit[i] = 1;
  23.471 +                    }
  23.472 +
  23.473 +    pswit[PARANOID_SWITCH] ^= 1;         /* Paranoid checking is turned OFF, not on, by its switch */
  23.474 +
  23.475 +    if (pswit[PARANOID_SWITCH]) {                         /* if running in paranoid mode */
  23.476 +        pswit[TYPO_SWITCH] = pswit[TYPO_SWITCH] ^ 1;      /* force typo checks as well   */
  23.477 +        }                                                 /* v.20 removed s and p switches from paranoid mode */
  23.478 +
  23.479 +    pswit[LINE_END_SWITCH] ^= 1;         /* Line-end checking is turned OFF, not on, by its switch */
  23.480 +    pswit[ECHO_SWITCH] ^= 1;             /* V.21 Echoing is turned OFF, not on, by its switch      */
  23.481 +
  23.482 +    if (pswit[OVERVIEW_SWITCH])       /* just print summary; don't echo */
  23.483 +        pswit[ECHO_SWITCH] = 0;
  23.484 +
  23.485 +    /* Web uploads - for the moment, this is really just a placeholder     */
  23.486 +    /* until we decide what processing we really want to do on web uploads */
  23.487 +    if (pswit[WEB_SWITCH]) {          /* specific override for web uploads */
  23.488 +        pswit[ECHO_SWITCH] =     1;
  23.489 +        pswit[SQUOTE_SWITCH] =   0;
  23.490 +        pswit[TYPO_SWITCH] =     1;
  23.491 +        pswit[QPARA_SWITCH] =    0;
  23.492 +        pswit[PARANOID_SWITCH] = 1;
  23.493 +        pswit[LINE_END_SWITCH] = 0;
  23.494 +        pswit[OVERVIEW_SWITCH] = 0;
  23.495 +        pswit[STDOUT_SWITCH] =   0;
  23.496 +        pswit[HEADER_SWITCH] =   1;
  23.497 +        pswit[VERBOSE_SWITCH] =  0;
  23.498 +        pswit[MARKUP_SWITCH] =   0;
  23.499 +        pswit[USERTYPO_SWITCH] = 0;
  23.500 +        pswit[DP_SWITCH] = 0;
  23.501 +        }
  23.502 +
  23.503 +
  23.504 +    if (argc < MINARGS || argc > MAXARGS) {  /* check number of args */
  23.505 +        proghelp();
  23.506 +        return(1);            /* exit */
  23.507 +        }
  23.508 +
  23.509 +
  23.510 +    /* read in the user-defined stealth scanno list */
  23.511 +
  23.512 +    if (pswit[USERTYPO_SWITCH]) {                    /* ... we were told we had one! */
  23.513 +        if ((usertypofile = fopen(USERTYPO_FILE, "rb")) == NULL) {   /* not in cwd. try gutcheck directory. */
  23.514 +            strcpy(usertypo_file, running_from);
  23.515 +            strcat(usertypo_file, USERTYPO_FILE);
  23.516 +            if ((usertypofile = fopen(usertypo_file, "rb")) == NULL) {  /* we ain't got no user typo file! */
  23.517 +                printf("   --> I couldn't find gutcheck.typ -- proceeding without user typos.\n");
  23.518 +                }
  23.519 +            }
  23.520 +
  23.521 +        usertypo_count = 0;
  23.522 +        if (usertypofile) {  /* we managed to open a User Typo File! */
  23.523 +            if (pswit[USERTYPO_SWITCH]) {
  23.524 +                while (flgets(aline, LINEBUFSIZE-1, usertypofile, (long)usertypo_count)) {
  23.525 +                    if (strlen(aline) > 1) {
  23.526 +                        if ((int)*aline > 33) {
  23.527 +                            s = malloc(strlen(aline)+1);
  23.528 +                            if (!s) {
  23.529 +                                fprintf(stderr, "gutcheck: cannot get enough memory for user typo file!!\n");
  23.530 +                                exit(1);
  23.531 +                                }
  23.532 +                            strcpy(s, aline);
  23.533 +                            usertypo[usertypo_count] = s;
  23.534 +                            usertypo_count++;
  23.535 +                            if (usertypo_count >= MAX_USER_TYPOS) {
  23.536 +                                printf("   --> Only %d user-defined typos allowed: ignoring the rest\n");
  23.537 +                                break;
  23.538 +                                }
  23.539 +                            }
  23.540 +                        }
  23.541 +                    }
  23.542 +                }
  23.543 +            fclose(usertypofile);
  23.544 +            }
  23.545 +        }
  23.546 +
  23.547 +
  23.548 +
  23.549 +
  23.550 +    fprintf(stderr, "gutcheck: Check and report on an e-text\n");
  23.551 +
  23.552 +    cnt_dquot = cnt_squot = cnt_brack = cnt_bin = cnt_odd = cnt_long =
  23.553 +    cnt_short = cnt_punct = cnt_dash = cnt_word = cnt_html = cnt_lineend =
  23.554 +    cnt_spacend = 0;
  23.555 +
  23.556 +    procfile(argv[0]);
  23.557 +
  23.558 +    if (pswit[OVERVIEW_SWITCH]) {
  23.559 +                         printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
  23.560 +                            checked_linecnt, linecnt, linecnt - checked_linecnt);
  23.561 +                         printf("    --------------- Queries found --------------\n");
  23.562 +        if (cnt_long)    printf("    Long lines:                             %5ld\n",cnt_long);
  23.563 +        if (cnt_short)   printf("    Short lines:                            %5ld\n",cnt_short);
  23.564 +        if (cnt_lineend) printf("    Line-end problems:                      %5ld\n",cnt_lineend);
  23.565 +        if (cnt_word)    printf("    Common typos:                           %5ld\n",cnt_word);
  23.566 +        if (cnt_dquot)   printf("    Unmatched quotes:                       %5ld\n",cnt_dquot);
  23.567 +        if (cnt_squot)   printf("    Unmatched SingleQuotes:                 %5ld\n",cnt_squot);
  23.568 +        if (cnt_brack)   printf("    Unmatched brackets:                     %5ld\n",cnt_brack);
  23.569 +        if (cnt_bin)     printf("    Non-ASCII characters:                   %5ld\n",cnt_bin);
  23.570 +        if (cnt_odd)     printf("    Proofing characters:                    %5ld\n",cnt_odd);
  23.571 +        if (cnt_punct)   printf("    Punctuation & spacing queries:          %5ld\n",cnt_punct);
  23.572 +        if (cnt_dash)    printf("    Non-standard dashes:                    %5ld\n",cnt_dash);
  23.573 +        if (cnt_html)    printf("    Possible HTML tags:                     %5ld\n",cnt_html);
  23.574 +        printf("\n");
  23.575 +        printf("    TOTAL QUERIES                           %5ld\n",
  23.576 +            cnt_dquot + cnt_squot + cnt_brack + cnt_bin + cnt_odd + cnt_long +
  23.577 +            cnt_short + cnt_punct + cnt_dash + cnt_word + cnt_html + cnt_lineend);
  23.578 +        }
  23.579 +
  23.580 +    return(0);
  23.581 +}
  23.582 +
  23.583 +
  23.584 +
  23.585 +/* procfile - process one file */
  23.586 +
  23.587 +void procfile(char *filename)
  23.588 +{
  23.589 +
  23.590 +    char *s, *t, *s1, laststart, *wordstart;
  23.591 +    char inword[MAXWORDLEN], testword[MAXWORDLEN];
  23.592 +    char parastart[81];     /* first line of current para */
  23.593 +    FILE *infile;
  23.594 +    long quot, squot, firstline, alphalen, totlen, binlen,
  23.595 +         shortline, longline, verylongline, spacedash, emdash,
  23.596 +         space_emdash, non_PG_space_emdash, PG_space_emdash,
  23.597 +         footerline, dotcomma, start_para_line, astline, fslashline,
  23.598 +         standalone_digit, hyphens, htmcount, endquote_count;
  23.599 +    long spline, nspline;
  23.600 +    signed int i, j, llen, isemptyline, isacro, isellipsis, istypo, alower,
  23.601 +         eNon_A, eTab, eTilde, eAst, eFSlash, eCarat;
  23.602 +    signed int warn_short, warn_long, warn_bin, warn_dash, warn_dotcomma,
  23.603 +         warn_ast, warn_fslash, warn_digit, warn_hyphen, warn_endquote;
  23.604 +    unsigned int lastlen, lastblen;
  23.605 +    signed int s_brack, c_brack, r_brack, c_unders;
  23.606 +    signed int open_single_quote, close_single_quote, guessquote, dquotepar, squotepar;
  23.607 +    signed int isnewpara, vowel, consonant;
  23.608 +    char dquote_err[80], squote_err[80], rbrack_err[80], sbrack_err[80], cbrack_err[80],
  23.609 +         unders_err[80];
  23.610 +    signed int qword_index, qperiod_index, isdup;
  23.611 +    signed int enddash;
  23.612 +    signed int Dutchcount, isDutch, Frenchcount, isFrench;
  23.613 +
  23.614 +
  23.615 +    
  23.616 +
  23.617 +
  23.618 +    laststart = CHAR_SPACE;
  23.619 +    lastlen = lastblen = 0;
  23.620 +    *dquote_err = *squote_err = *rbrack_err = *cbrack_err = *sbrack_err =
  23.621 +        *unders_err = *prevline = 0;
  23.622 +    linecnt = firstline = alphalen = totlen = binlen =
  23.623 +        shortline = longline = spacedash = emdash = checked_linecnt =
  23.624 +        space_emdash = non_PG_space_emdash = PG_space_emdash =
  23.625 +        footerline = dotcomma = start_para_line = astline = fslashline = 
  23.626 +        standalone_digit = hyphens = htmcount = endquote_count = 0;
  23.627 +    quot = squot = s_brack = c_brack = r_brack = c_unders = 0;
  23.628 +    i = llen = isemptyline = isacro = isellipsis = istypo = 0;
  23.629 +    warn_short = warn_long = warn_bin = warn_dash = warn_dotcomma = 
  23.630 +        warn_ast = warn_fslash = warn_digit = warn_endquote = 0;
  23.631 +    isnewpara = vowel = consonant = enddash = 0;
  23.632 +    spline = nspline = 0;
  23.633 +    qword_index = qperiod_index = isdup = 0;
  23.634 +    *inword = *testword = 0;
  23.635 +    open_single_quote = close_single_quote = guessquote = dquotepar = squotepar = 0;
  23.636 +    Dutchcount = isDutch = Frenchcount = isFrench = 0;
  23.637 +
  23.638 +
  23.639 +    for (j = 0; j < MAX_QWORD; j++) {
  23.640 +        dupcnt[j] = 0;
  23.641 +        for (i = 0; i < MAX_QWORD_LENGTH; i++)
  23.642 +            qword[i][j] = 0;
  23.643 +            qperiod[i][j] = 0;
  23.644 +            }
  23.645 +
  23.646 +
  23.647 +    if ((infile = fopen(filename, "rb")) == NULL) {
  23.648 +        if (pswit[STDOUT_SWITCH])
  23.649 +            fprintf(stdout, "gutcheck: cannot open %s\n", filename);
  23.650 +        else
  23.651 +            fprintf(stderr, "gutcheck: cannot open %s\n", filename);
  23.652 +        exit(1);
  23.653 +        }
  23.654 +
  23.655 +    fprintf(stdout, "\n\nFile: %s\n\n", filename);
  23.656 +    firstline = shortline = longline = verylongline = 0;
  23.657 +
  23.658 +
  23.659 +    /*****************************************************/
  23.660 +    /*                                                   */
  23.661 +    /*  Run a first pass - verify that it's a valid PG   */
  23.662 +    /*  file, decide whether to report some things that  */
  23.663 +    /*  occur many times in the text like long or short  */
  23.664 +    /*  lines, non-standard dashes, and other good stuff */
  23.665 +    /*  I'll doubtless think of later.                   */
  23.666 +    /*                                                   */
  23.667 +    /*****************************************************/
  23.668 +
  23.669 +    /*****************************************************/
  23.670 +    /* V.24  Sigh. Yet Another Header Change             */
  23.671 +    /*****************************************************/
  23.672 +
  23.673 +    while (fgets(aline, LINEBUFSIZE-1, infile)) {
  23.674 +        while (aline[strlen(aline)-1] == 10 || aline[strlen(aline)-1] == 13 ) aline[strlen(aline)-1] = 0;
  23.675 +        linecnt++;
  23.676 +        if (strstr(aline, "*END") && strstr(aline, "SMALL PRINT") && (strstr(aline, "PUBLIC DOMAIN") || strstr(aline, "COPYRIGHT"))) {
  23.677 +            if (spline)
  23.678 +                printf("   --> Duplicate header?\n");
  23.679 +            spline = linecnt + 1;   /* first line of non-header text, that is */
  23.680 +            }
  23.681 +        if (!strncmp(aline, "*** START", 9) && strstr(aline, "PROJECT GUTENBERG")) {
  23.682 +            if (nspline)
  23.683 +                printf("   --> Duplicate header?\n");
  23.684 +            nspline = linecnt + 1;   /* first line of non-header text, that is */
  23.685 +            }
  23.686 +        if (spline || nspline) {
  23.687 +            lowerit(aline);
  23.688 +            if (strstr(aline, "end") && strstr(aline, "project gutenberg")) {
  23.689 +                if (strstr(aline, "end") < strstr(aline, "project gutenberg")) {
  23.690 +                    if (footerline) {
  23.691 +                        if (!nspline) /* it's an old-form header - we can detect duplicates */
  23.692 +                            printf("   --> Duplicate footer?\n");
  23.693 +                        else 
  23.694 +                            ;
  23.695 +                        }
  23.696 +                    else {
  23.697 +                        footerline = linecnt;
  23.698 +                        }
  23.699 +                    }
  23.700 +                }
  23.701 +            }
  23.702 +        if (spline) firstline = spline;
  23.703 +        if (nspline) firstline = nspline;  /* override with new */
  23.704 +
  23.705 +        if (footerline) continue;    /* 0.99+ don't count the boilerplate in the footer */
  23.706 +
  23.707 +        llen = strlen(aline);
  23.708 +        totlen += llen;
  23.709 +        for (i = 0; i < llen; i++) {
  23.710 +            if ((unsigned char)aline[i] > 127) binlen++;
  23.711 +            if (gcisalpha(aline[i])) alphalen++;
  23.712 +            if (i > 0)
  23.713 +                if (aline[i] == CHAR_DQUOTE && isalpha(aline[i-1]))
  23.714 +                    endquote_count++;
  23.715 +            }
  23.716 +        if (strlen(aline) > 2
  23.717 +            && lastlen > 2 && lastlen < SHORTEST_PG_LINE
  23.718 +            && lastblen > 2 && lastblen > SHORTEST_PG_LINE
  23.719 +            && laststart != CHAR_SPACE)
  23.720 +                shortline++;
  23.721 +
  23.722 +        if (*aline) /* fixed line below for 0.96 */
  23.723 +            if ((unsigned char)aline[strlen(aline)-1] <= CHAR_SPACE) cnt_spacend++;
  23.724 +
  23.725 +        if (strstr(aline, ".,")) dotcomma++;
  23.726 +        /* 0.98 only count ast lines for ignoring purposes where there is */
  23.727 +        /* locase text on the line */
  23.728 +        if (strstr(aline, "*")) {
  23.729 +            for (s = aline; *s; s++)
  23.730 +                if (*s >='a' && *s <= 'z')
  23.731 +                    break;
  23.732 +             if (*s) astline++;
  23.733 +             }
  23.734 +        if (strstr(aline, "/"))
  23.735 +            fslashline++;
  23.736 +        for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
  23.737 +        if (aline[i] == '-' && aline[i-1] != '-') hyphens++;
  23.738 +
  23.739 +        if (llen > LONGEST_PG_LINE) longline++;
  23.740 +        if (llen > WAY_TOO_LONG) verylongline++;
  23.741 +
  23.742 +        if (strstr(aline, "<") && strstr(aline, ">")) {
  23.743 +            i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
  23.744 +            if (i > 0) 
  23.745 +                htmcount++;
  23.746 +            if (strstr(aline, "<i>")) htmcount +=4; /* bonus marks! */
  23.747 +            }
  23.748 +
  23.749 +        /* Check for spaced em-dashes */
  23.750 +        if (strstr(aline,"--")) {
  23.751 +            emdash++;
  23.752 +            if (*(strstr(aline, "--")-1) == CHAR_SPACE ||
  23.753 +               (*(strstr(aline, "--")+2) == CHAR_SPACE))
  23.754 +                    space_emdash++;
  23.755 +            if (*(strstr(aline, "--")-1) == CHAR_SPACE &&
  23.756 +               (*(strstr(aline, "--")+2) == CHAR_SPACE))
  23.757 +                    non_PG_space_emdash++;             /* count of em-dashes with spaces both sides */
  23.758 +            if (*(strstr(aline, "--")-1) != CHAR_SPACE &&
  23.759 +               (*(strstr(aline, "--")+2) != CHAR_SPACE))
  23.760 +                    PG_space_emdash++;                 /* count of PG-type em-dashes with no spaces */
  23.761 +            }
  23.762 +
  23.763 +        for (s = aline; *s;) {
  23.764 +            s = getaword(s, inword);
  23.765 +            if (!strcmp(inword, "hij") || !strcmp(inword, "niet")) 
  23.766 +                Dutchcount++;
  23.767 +            if (!strcmp(inword, "dans") || !strcmp(inword, "avec")) 
  23.768 +                Frenchcount++;
  23.769 +            if (!strcmp(inword, "0") || !strcmp(inword, "1")) 
  23.770 +                standalone_digit++;
  23.771 +            }
  23.772 +
  23.773 +        /* Check for spaced dashes */
  23.774 +        if (strstr(aline," -"))
  23.775 +            if (*(strstr(aline, " -")+2) != '-')
  23.776 +                    spacedash++;
  23.777 +        lastblen = lastlen;
  23.778 +        lastlen = strlen(aline);
  23.779 +        laststart = aline[0];
  23.780 +
  23.781 +        }
  23.782 +    fclose(infile);
  23.783 +
  23.784 +
  23.785 +    /* now, based on this quick view, make some snap decisions */
  23.786 +    if (cnt_spacend > 0) {
  23.787 +        printf("   --> %ld lines in this file have white space at end\n", cnt_spacend);
  23.788 +        }
  23.789 +
  23.790 +    warn_dotcomma = 1;
  23.791 +    if (dotcomma > 5) {
  23.792 +        warn_dotcomma = 0;
  23.793 +        printf("   --> %ld lines in this file contain '.,'. Not reporting them.\n", dotcomma);
  23.794 +        }
  23.795 +
  23.796 +    /* if more than 50 lines, or one-tenth, are short, don't bother reporting them */
  23.797 +    warn_short = 1;
  23.798 +    if (shortline > 50 || shortline * 10 > linecnt) {
  23.799 +        warn_short = 0;
  23.800 +        printf("   --> %ld lines in this file are short. Not reporting short lines.\n", shortline);
  23.801 +        }
  23.802 +
  23.803 +    /* if more than 50 lines, or one-tenth, are long, don't bother reporting them */
  23.804 +    warn_long = 1;
  23.805 +    if (longline > 50 || longline * 10 > linecnt) {
  23.806 +        warn_long = 0;
  23.807 +        printf("   --> %ld lines in this file are long. Not reporting long lines.\n", longline);
  23.808 +        }
  23.809 +
  23.810 +    /* if more than 10 lines contain asterisks, don't bother reporting them V.0.97 */
  23.811 +    warn_ast = 1;
  23.812 +    if (astline > 10 ) {
  23.813 +        warn_ast = 0;
  23.814 +        printf("   --> %ld lines in this file contain asterisks. Not reporting them.\n", astline);
  23.815 +        }
  23.816 +
  23.817 +    /* if more than 10 lines contain forward slashes, don't bother reporting them V.0.99 */
  23.818 +    warn_fslash = 1;
  23.819 +    if (fslashline > 10 ) {
  23.820 +        warn_fslash = 0;
  23.821 +        printf("   --> %ld lines in this file contain forward slashes. Not reporting them.\n", fslashline);
  23.822 +        }
  23.823 +
  23.824 +    /* if more than 20 lines contain unpunctuated endquotes, don't bother reporting them V.0.99 */
  23.825 +    warn_endquote = 1;
  23.826 +    if (endquote_count > 20 ) {
  23.827 +        warn_endquote = 0;
  23.828 +        printf("   --> %ld lines in this file contain unpunctuated endquotes. Not reporting them.\n", endquote_count);
  23.829 +        }
  23.830 +
  23.831 +    /* if more than 15 lines contain standalone digits, don't bother reporting them V.0.97 */
  23.832 +    warn_digit = 1;
  23.833 +    if (standalone_digit > 10 ) {
  23.834 +        warn_digit = 0;
  23.835 +        printf("   --> %ld lines in this file contain standalone 0s and 1s. Not reporting them.\n", standalone_digit);
  23.836 +        }
  23.837 +
  23.838 +    /* if more than 20 lines contain hyphens at end, don't bother reporting them V.0.98 */
  23.839 +    warn_hyphen = 1;
  23.840 +    if (hyphens > 20 ) {
  23.841 +        warn_hyphen = 0;
  23.842 +        printf("   --> %ld lines in this file have hyphens at end. Not reporting them.\n", hyphens);
  23.843 +        }
  23.844 +
  23.845 +    if (htmcount > 20 && !pswit[MARKUP_SWITCH]) {
  23.846 +        printf("   --> Looks like this is HTML. Switching HTML mode ON.\n");
  23.847 +        pswit[MARKUP_SWITCH] = 1;
  23.848 +        }
  23.849 +        
  23.850 +    if (verylongline > 0) {
  23.851 +        printf("   --> %ld lines in this file are VERY long!\n", verylongline);
  23.852 +        }
  23.853 +
  23.854 +    /* If there are more non-PG spaced dashes than PG em-dashes,    */
  23.855 +    /* assume it's deliberate                                       */
  23.856 +    /* Current PG guidelines say don't use them, but older texts do,*/
  23.857 +    /* and some people insist on them whatever the guidelines say.  */
  23.858 +    /* V.20 removed requirement that PG_space_emdash be greater than*/
  23.859 +    /* ten before turning off warnings about spaced dashes.         */
  23.860 +    warn_dash = 1;
  23.861 +    if (spacedash + non_PG_space_emdash > PG_space_emdash) {
  23.862 +        warn_dash = 0;
  23.863 +        printf("   --> There are %ld spaced dashes and em-dashes. Not reporting them.\n", spacedash + non_PG_space_emdash);
  23.864 +        }
  23.865 +
  23.866 +    /* if more than a quarter of characters are hi-bit, bug out */
  23.867 +    warn_bin = 1;
  23.868 +    if (binlen * 4 > totlen) {
  23.869 +        printf("   --> This file does not appear to be ASCII. Terminating. Best of luck with it!\n");
  23.870 +        exit(1);
  23.871 +        }
  23.872 +    if (alphalen * 4 < totlen) {
  23.873 +        printf("   --> This file does not appear to be text. Terminating. Best of luck with it!\n");
  23.874 +        exit(1);
  23.875 +        }
  23.876 +    if ((binlen * 100 > totlen) || (binlen > 100)) {
  23.877 +        printf("   --> There are a lot of foreign letters here. Not reporting them.\n");
  23.878 +        warn_bin = 0;
  23.879 +        }
  23.880 +
  23.881 +    /* isDutch and isFrench added .991 Feb 06 for Frank, Jeroen, Renald */
  23.882 +    isDutch = 0;
  23.883 +    if (Dutchcount > 50) {
  23.884 +        isDutch = 1;
  23.885 +        printf("   --> This looks like Dutch - switching off dashes and warnings for 's Middags case.\n");
  23.886 +        }
  23.887 +
  23.888 +    isFrench = 0;
  23.889 +    if (Frenchcount > 50) {
  23.890 +        isFrench = 1;
  23.891 +        printf("   --> This looks like French - switching off some doublepunct.\n");
  23.892 +        }
  23.893 +
  23.894 +    if (firstline && footerline)
  23.895 +        printf("    The PG header and footer appear to be already on.\n");
  23.896 +    else {
  23.897 +        if (firstline)
  23.898 +            printf("    The PG header is on - no footer.\n");
  23.899 +        if (footerline)
  23.900 +            printf("    The PG footer is on - no header.\n");
  23.901 +        }
  23.902 +    printf("\n");
  23.903 +
  23.904 +    /* V.22 George Davis asked for an override switch to force it to list everything */
  23.905 +    if (pswit[VERBOSE_SWITCH]) {
  23.906 +        warn_bin = 1;
  23.907 +        warn_short = 1;
  23.908 +        warn_dotcomma = 1;
  23.909 +        warn_long = 1;
  23.910 +        warn_dash = 1;
  23.911 +        warn_digit = 1;
  23.912 +        warn_ast = 1;
  23.913 +        warn_fslash = 1;
  23.914 +        warn_hyphen = 1;
  23.915 +        warn_endquote = 1;
  23.916 +        printf("   *** Verbose output is ON -- you asked for it! ***\n");
  23.917 +        }
  23.918 +
  23.919 +    if (isDutch)
  23.920 +        warn_dash = 0;  /* Frank suggested turning it REALLY off for Dutch */
  23.921 +
  23.922 +    if ((infile = fopen(filename, "rb")) == NULL) {
  23.923 +        if (pswit[STDOUT_SWITCH])
  23.924 +            fprintf(stdout, "gutcheck: cannot open %s\n", filename);
  23.925 +        else
  23.926 +            fprintf(stderr, "gutcheck: cannot open %s\n", filename);
  23.927 +        exit(1);
  23.928 +        }
  23.929 +
  23.930 +    if (footerline > 0 && firstline > 0 && footerline > firstline && footerline - firstline < 100) { /* ugh */
  23.931 +        printf("   --> I don't really know where this text starts. \n");
  23.932 +        printf("       There are no reference points.\n");
  23.933 +        printf("       I'm going to have to report the header and footer as well.\n");
  23.934 +        firstline=0;
  23.935 +        }
  23.936 +        
  23.937 +
  23.938 +
  23.939 +    /*****************************************************/
  23.940 +    /*                                                   */
  23.941 +    /* Here we go with the main pass. Hold onto yer hat! */
  23.942 +    /*                                                   */
  23.943 +    /*****************************************************/
  23.944 +
  23.945 +    /* Re-init some variables we've dirtied */
  23.946 +    quot = squot = linecnt = 0;
  23.947 +    laststart = CHAR_SPACE;
  23.948 +    lastlen = lastblen = 0;
  23.949 +
  23.950 +    while (flgets(aline, LINEBUFSIZE-1, infile, linecnt+1)) {
  23.951 +        linecnt++;
  23.952 +        if (linecnt == 1) isnewpara = 1;
  23.953 +        if (pswit[DP_SWITCH])
  23.954 +            if (!strncmp(aline, "-----File: ", 11))
  23.955 +                continue;    // skip DP page separators completely
  23.956 +        if (linecnt < firstline || (footerline > 0 && linecnt > footerline)) {
  23.957 +            if (pswit[HEADER_SWITCH]) {
  23.958 +                if (!strncmp(aline, "Title:", 6))
  23.959 +                    printf("    %s\n", aline);
  23.960 +                if (!strncmp (aline, "Author:", 7))
  23.961 +                    printf("    %s\n", aline);
  23.962 +                if (!strncmp(aline, "Release Date:", 13))
  23.963 +                    printf("    %s\n", aline);
  23.964 +                if (!strncmp(aline, "Edition:", 8))
  23.965 +                    printf("    %s\n\n", aline);
  23.966 +                }
  23.967 +            continue;                /* skip through the header */
  23.968 +            }
  23.969 +        checked_linecnt++;
  23.970 +        s = aline;
  23.971 +        isemptyline = 1;      /* assume the line is empty until proven otherwise */
  23.972 +
  23.973 +        /* If we are in a state of unbalanced quotes, and this line    */
  23.974 +        /* doesn't begin with a quote, output the stored error message */
  23.975 +        /* If the -P switch was used, print the warning even if the    */
  23.976 +        /* new para starts with quotes                                 */
  23.977 +        /* Version .20 - if the new paragraph does start with a quote, */
  23.978 +        /* but is indented, I was giving a spurious error. Need to     */
  23.979 +        /* check the first _non-space_ character on the line rather    */
  23.980 +        /* than the first character when deciding whether the para     */
  23.981 +        /* starts with a quote. Using *t for this.                     */
  23.982 +        t = s;
  23.983 +        while (*t == ' ') t++;
  23.984 +        if (*dquote_err)
  23.985 +            if (*t != CHAR_DQUOTE || pswit[QPARA_SWITCH]) {
  23.986 +                if (!pswit[OVERVIEW_SWITCH]) {
  23.987 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
  23.988 +                    printf(dquote_err);
  23.989 +                    }
  23.990 +                else
  23.991 +                    cnt_dquot++;
  23.992 +            }
  23.993 +        if (*squote_err) {
  23.994 +            if (*t != CHAR_SQUOTE && *t != CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] || squot) {
  23.995 +                if (!pswit[OVERVIEW_SWITCH]) {
  23.996 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
  23.997 +                    printf(squote_err);
  23.998 +                    }
  23.999 +                else
 23.1000 +                    cnt_squot++;
 23.1001 +                }
 23.1002 +            squot = 0;
 23.1003 +            }
 23.1004 +        if (*rbrack_err) {
 23.1005 +            if (!pswit[OVERVIEW_SWITCH]) {
 23.1006 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 23.1007 +                printf(rbrack_err);
 23.1008 +                }
 23.1009 +            else
 23.1010 +                cnt_brack++;
 23.1011 +            }
 23.1012 +        if (*sbrack_err) {
 23.1013 +            if (!pswit[OVERVIEW_SWITCH]) {
 23.1014 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 23.1015 +                printf(sbrack_err);
 23.1016 +                }
 23.1017 +            else
 23.1018 +                cnt_brack++;
 23.1019 +            }
 23.1020 +        if (*cbrack_err) {
 23.1021 +            if (!pswit[OVERVIEW_SWITCH]) {
 23.1022 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 23.1023 +                printf(cbrack_err);
 23.1024 +                }
 23.1025 +            else
 23.1026 +                cnt_brack++;
 23.1027 +            }
 23.1028 +        if (*unders_err) {
 23.1029 +            if (!pswit[OVERVIEW_SWITCH]) {
 23.1030 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 23.1031 +                printf(unders_err);
 23.1032 +                }
 23.1033 +            else
 23.1034 +                cnt_brack++;
 23.1035 +            }
 23.1036 +
 23.1037 +        *dquote_err = *squote_err = *rbrack_err = *cbrack_err = 
 23.1038 +            *sbrack_err = *unders_err = 0;
 23.1039 +
 23.1040 +
 23.1041 +        /* look along the line, accumulate the count of quotes, and see */
 23.1042 +        /* if this is an empty line - i.e. a line with nothing on it    */
 23.1043 +        /* but spaces.                                                  */
 23.1044 +        /* V .12 also if line has just spaces, * and/or - on it, don't  */
 23.1045 +        /* count it, since empty lines with asterisks or dashes to      */
 23.1046 +        /* separate sections are common.                                */
 23.1047 +        /* V .15 new single-quote checking - has to be better than the  */
 23.1048 +        /* previous version, but how much better? fingers crossed!      */
 23.1049 +        /* V .20 add period to * and - as characters on a separator line*/
 23.1050 +        s = aline;
 23.1051 +        while (*s) {
 23.1052 +            if (*s == CHAR_DQUOTE) quot++;
 23.1053 +            if (*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
 23.1054 +                if (s == aline) { /* at start of line, it can only be an openquote */
 23.1055 +                    if (strncmp(s+2, "tis", 3) && strncmp(s+2, "Tis", 3)) /* hardcode a very common exception! */
 23.1056 +                        open_single_quote++;
 23.1057 +                    }
 23.1058 +                else
 23.1059 +                    if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
 23.1060 +                        ; /* do nothing! - it's definitely an apostrophe, not a quote */
 23.1061 +                    else        /* it's outside a word - let's check it out */
 23.1062 +                        if (*s == CHAR_OPEN_SQUOTE || gcisalpha(*(s+1))) { /* it damwell better BE an openquote */
 23.1063 +                            if (strncmp(s+1, "tis", 3) && strncmp(s+1, "Tis", 3)) /* hardcode a very common exception! */
 23.1064 +                                open_single_quote++;
 23.1065 +                            }
 23.1066 +                        else { /* now - is it a closequote? */
 23.1067 +                            guessquote = 0;   /* accumulate clues */
 23.1068 +                            if (gcisalpha(*(s-1))) { /* it follows a letter - could be either */
 23.1069 +                                guessquote += 1;
 23.1070 +                                if (*(s-1) == 's') { /* looks like a plural apostrophe */
 23.1071 +                                    guessquote -= 3;
 23.1072 +                                    if (*(s+1) == CHAR_SPACE)  /* bonus marks! */
 23.1073 +                                        guessquote -= 2;
 23.1074 +                                    }
 23.1075 +                                }
 23.1076 +                            else /* it doesn't have a letter either side */
 23.1077 +                                if (strchr(".?!,;:", *(s-1)) && (strchr(".?!,;: ", *(s+1))))
 23.1078 +                                    guessquote += 8; /* looks like a closequote */
 23.1079 +                                else
 23.1080 +                                    guessquote += 1;
 23.1081 +                            if (open_single_quote > close_single_quote)
 23.1082 +                                guessquote += 1; /* give it the benefit of some doubt - if a squote is already open */
 23.1083 +                            else
 23.1084 +                                guessquote -= 1;
 23.1085 +                            if (guessquote >= 0)
 23.1086 +                                close_single_quote++;
 23.1087 +                            }
 23.1088 +
 23.1089 +            if (*s != CHAR_SPACE
 23.1090 +                && *s != '-'
 23.1091 +                && *s != '.'
 23.1092 +                && *s != CHAR_ASTERISK
 23.1093 +                && *s != 13
 23.1094 +                && *s != 10) isemptyline = 0;  /* ignore lines like  *  *  *  as spacers */
 23.1095 +            if (*s == CHAR_UNDERSCORE) c_unders++;
 23.1096 +            if (*s == CHAR_OPEN_CBRACK) c_brack++;
 23.1097 +            if (*s == CHAR_CLOSE_CBRACK) c_brack--;
 23.1098 +            if (*s == CHAR_OPEN_RBRACK) r_brack++;
 23.1099 +            if (*s == CHAR_CLOSE_RBRACK) r_brack--;
 23.1100 +            if (*s == CHAR_OPEN_SBRACK) s_brack++;
 23.1101 +            if (*s == CHAR_CLOSE_SBRACK) s_brack--;
 23.1102 +            s++;
 23.1103 +            }
 23.1104 +
 23.1105 +        if (isnewpara && !isemptyline) {   /* This line is the start of a new paragraph */
 23.1106 +            start_para_line = linecnt;
 23.1107 +            strncpy(parastart, aline, 80); /* Capture its first line in case we want to report it later */
 23.1108 +            parastart[79] = 0;
 23.1109 +            dquotepar = squotepar = 0; /* restart the quote count 0.98 */
 23.1110 +            s = aline;
 23.1111 +            while (!gcisalpha(*s) && !gcisdigit(*s) && *s) s++;    /* V.97 fixed bug - overran line and gave false warning - rare */
 23.1112 +            if (*s >= 'a' && *s <='z') { /* and its first letter is lowercase */
 23.1113 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1114 +                if (!pswit[OVERVIEW_SWITCH])
 23.1115 +                    printf("    Line %ld column %d - Paragraph starts with lower-case\n", linecnt, (int)(s - aline) +1);
 23.1116 +                else
 23.1117 +                    cnt_punct++;
 23.1118 +                }
 23.1119 +            isnewpara = 0; /* Signal the end of new para processing */
 23.1120 +            }
 23.1121 +
 23.1122 +        /* Check for an em-dash broken at line end */
 23.1123 +        if (enddash && *aline == '-') {
 23.1124 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1125 +            if (!pswit[OVERVIEW_SWITCH])
 23.1126 +                printf("    Line %ld column 1 - Broken em-dash?\n", linecnt);
 23.1127 +            else
 23.1128 +                cnt_punct++;
 23.1129 +            }
 23.1130 +        enddash = 0;
 23.1131 +        for (s = aline + strlen(aline) - 1; *s == ' ' && s > aline; s--);
 23.1132 +        if (s >= aline && *s == '-')
 23.1133 +            enddash = 1;
 23.1134 +            
 23.1135 +
 23.1136 +        /* Check for invalid or questionable characters in the line */
 23.1137 +        /* Anything above 127 is invalid for plain ASCII,  and      */
 23.1138 +        /* non-printable control characters should also be flagged. */
 23.1139 +        /* Tabs should generally not be there.                      */
 23.1140 +        /* Jan 06, in 0.99: Hm. For some strange reason, I either   */
 23.1141 +        /* never created or deleted the check for unprintable       */
 23.1142 +        /* control characters. They should be reported even if      */
 23.1143 +        /* warn_bin is on, I think, and in full.                    */
 23.1144 +
 23.1145 +        for (s = aline; *s; s++) {
 23.1146 +            i = (unsigned char) *s;
 23.1147 +            if (i < CHAR_SPACE && i != CHAR_LF && i != CHAR_CR && i != CHAR_TAB) {
 23.1148 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1149 +                if (!pswit[OVERVIEW_SWITCH])
 23.1150 +                    printf("    Line %ld column %d - Control character %d\n", linecnt, (int) (s - aline) + 1, i);
 23.1151 +                else
 23.1152 +                    cnt_bin++;
 23.1153 +                }
 23.1154 +            }
 23.1155 +
 23.1156 +        if (warn_bin) {
 23.1157 +            eNon_A = eTab = eTilde = eCarat = eFSlash = eAst = 0;  /* don't repeat multiple warnings on one line */
 23.1158 +            for (s = aline; *s; s++) {
 23.1159 +                if (!eNon_A && ((*s < CHAR_SPACE && *s != 9 && *s != '\n') || (unsigned char)*s > 127)) {
 23.1160 +                    i = *s;                           /* annoying kludge for signed chars */
 23.1161 +                    if (i < 0) i += 256;
 23.1162 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1163 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1164 +                        if (i > 127 && i < 160)
 23.1165 +                            printf("    Line %ld column %d - Non-ISO-8859 character %d\n", linecnt, (int) (s - aline) + 1, i);
 23.1166 +                        else
 23.1167 +                            printf("    Line %ld column %d - Non-ASCII character %d\n", linecnt, (int) (s - aline) + 1, i);
 23.1168 +                    else
 23.1169 +                        cnt_bin++;
 23.1170 +                    eNon_A = 1;
 23.1171 +                    }
 23.1172 +                if (!eTab && *s == CHAR_TAB) {
 23.1173 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1174 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1175 +                        printf("    Line %ld column %d - Tab character?\n", linecnt, (int) (s - aline) + 1);
 23.1176 +                    else
 23.1177 +                        cnt_odd++;
 23.1178 +                    eTab = 1;
 23.1179 +                    }
 23.1180 +                if (!eTilde && *s == CHAR_TILDE) {  /* often used by OCR software to indicate an unrecognizable character */
 23.1181 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1182 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1183 +                        printf("    Line %ld column %d - Tilde character?\n", linecnt, (int) (s - aline) + 1);
 23.1184 +                    else
 23.1185 +                        cnt_odd++;
 23.1186 +                    eTilde = 1;
 23.1187 +                    }
 23.1188 +                if (!eCarat && *s == CHAR_CARAT) {  
 23.1189 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1190 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1191 +                        printf("    Line %ld column %d - Carat character?\n", linecnt, (int) (s - aline) + 1);
 23.1192 +                    else
 23.1193 +                        cnt_odd++;
 23.1194 +                    eCarat = 1;
 23.1195 +                    }
 23.1196 +                if (!eFSlash && *s == CHAR_FORESLASH && warn_fslash) {  
 23.1197 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1198 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1199 +                        printf("    Line %ld column %d - Forward slash?\n", linecnt, (int) (s - aline) + 1);
 23.1200 +                    else
 23.1201 +                        cnt_odd++;
 23.1202 +                    eFSlash = 1;
 23.1203 +                    }
 23.1204 +                /* report asterisks only in paranoid mode, since they're often deliberate */
 23.1205 +                if (!eAst && pswit[PARANOID_SWITCH] && warn_ast && !isemptyline && *s == CHAR_ASTERISK) {
 23.1206 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1207 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1208 +                        printf("    Line %ld column %d - Asterisk?\n", linecnt, (int) (s - aline) + 1);
 23.1209 +                    else
 23.1210 +                        cnt_odd++;
 23.1211 +                    eAst = 1;
 23.1212 +                    }
 23.1213 +                }
 23.1214 +            }
 23.1215 +
 23.1216 +        /* Check for line too long */
 23.1217 +        if (warn_long) {
 23.1218 +            if (strlen(aline) > LONGEST_PG_LINE) {
 23.1219 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1220 +                if (!pswit[OVERVIEW_SWITCH])
 23.1221 +                    printf("    Line %ld column %d - Long line %d\n", linecnt, strlen(aline), strlen(aline));
 23.1222 +                else
 23.1223 +                    cnt_long++;
 23.1224 +                }
 23.1225 +            }
 23.1226 +
 23.1227 +        /* Check for line too short.                                     */
 23.1228 +        /* This one is a bit trickier to implement: we don't want to     */
 23.1229 +        /* flag the last line of a paragraph for being short, so we      */
 23.1230 +        /* have to wait until we know that our current line is a         */
 23.1231 +        /* "normal" line, then report the _previous_ line if it was too  */
 23.1232 +        /* short. We also don't want to report indented lines like       */
 23.1233 +        /* chapter heads or formatted quotations. We therefore keep      */
 23.1234 +        /* lastlen as the length of the last line examined, and          */
 23.1235 +        /* lastblen as the length of the last but one, and try to        */
 23.1236 +        /* suppress unnecessary warnings by checking that both were of   */
 23.1237 +        /* "normal" length. We keep the first character of the last      */
 23.1238 +        /* line in laststart, and if it was a space, we assume that the  */
 23.1239 +        /* formatting is deliberate. I can't figure out a way to         */
 23.1240 +        /* distinguish something like a quoted verse left-aligned or     */
 23.1241 +        /* the header or footer of a letter from a paragraph of short    */
 23.1242 +        /* lines - maybe if I examined the whole paragraph, and if the   */
 23.1243 +        /* para has less than, say, 8 lines and if all lines are short,  */
 23.1244 +        /* then just assume it's OK? Need to look at some texts to see   */
 23.1245 +        /* how often a formula like this would get the right result.     */
 23.1246 +        /* V0.99 changed the tolerance for length to ignore from 2 to 1  */
 23.1247 +        if (warn_short) {
 23.1248 +            if (strlen(aline) > 1
 23.1249 +                && lastlen > 1 && lastlen < SHORTEST_PG_LINE
 23.1250 +                && lastblen > 1 && lastblen > SHORTEST_PG_LINE
 23.1251 +                && laststart != CHAR_SPACE) {
 23.1252 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
 23.1253 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1254 +                        printf("    Line %ld column %d - Short line %d?\n", linecnt-1, strlen(prevline), strlen(prevline));
 23.1255 +                    else
 23.1256 +                        cnt_short++;
 23.1257 +                    }
 23.1258 +            }
 23.1259 +        lastblen = lastlen;
 23.1260 +        lastlen = strlen(aline);
 23.1261 +        laststart = aline[0];
 23.1262 +
 23.1263 +        /* look for punctuation at start of line */
 23.1264 +        if  (*aline && strchr(".?!,;:",  aline[0]))  {            /* if it's punctuation */
 23.1265 +            if (strncmp(". . .", aline, 5)) {   /* exception for ellipsis: V.98 tightened up to except only a full ellipsis */
 23.1266 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1267 +                if (!pswit[OVERVIEW_SWITCH])
 23.1268 +                    printf("    Line %ld column 1 - Begins with punctuation?\n", linecnt);
 23.1269 +                else
 23.1270 +                    cnt_punct++;
 23.1271 +                }
 23.1272 +            }
 23.1273 +
 23.1274 +        /* Check for spaced em-dashes                            */
 23.1275 +        /* V.20 must check _all_ occurrences of "--" on the line */
 23.1276 +        /* hence the loop - even if the first double-dash is OK  */
 23.1277 +        /* there may be another that's wrong later on.           */
 23.1278 +        if (warn_dash) {
 23.1279 +            s = aline;
 23.1280 +            while (strstr(s,"--")) {
 23.1281 +                if (*(strstr(s, "--")-1) == CHAR_SPACE ||
 23.1282 +                   (*(strstr(s, "--")+2) == CHAR_SPACE)) {
 23.1283 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1284 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1285 +                        printf("    Line %ld column %d - Spaced em-dash?\n", linecnt, (int) (strstr(s,"--") - aline) + 1);
 23.1286 +                    else
 23.1287 +                        cnt_dash++;
 23.1288 +                    }
 23.1289 +                s = strstr(s,"--") + 2;
 23.1290 +                }
 23.1291 +            }
 23.1292 +
 23.1293 +        /* Check for spaced dashes */
 23.1294 +        if (warn_dash)
 23.1295 +            if (strstr(aline," -")) {
 23.1296 +                if (*(strstr(aline, " -")+2) != '-') {
 23.1297 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1298 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1299 +                        printf("    Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline," -") - aline) + 1);
 23.1300 +                    else
 23.1301 +                        cnt_dash++;
 23.1302 +                    }
 23.1303 +                }
 23.1304 +            else
 23.1305 +                if (strstr(aline,"- ")) {
 23.1306 +                    if (*(strstr(aline, "- ")-1) != '-') {
 23.1307 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1308 +                        if (!pswit[OVERVIEW_SWITCH])
 23.1309 +                            printf("    Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline,"- ") - aline) + 1);
 23.1310 +                        else
 23.1311 +                            cnt_dash++;
 23.1312 +                        }
 23.1313 +                    }
 23.1314 +
 23.1315 +        /* v 0.99                                                       */
 23.1316 +        /* Check for unmarked paragraphs indicated by separate speakers */
 23.1317 +        /* May well be false positive:                                  */
 23.1318 +        /* "Bravo!" "Wonderful!" called the crowd.                      */
 23.1319 +        /* but useful all the same.                                     */
 23.1320 +        s = wrk;
 23.1321 +        *s = 0;
 23.1322 +        if (strstr(aline, "\" \"")) s = strstr(aline, "\" \"");
 23.1323 +        if (strstr(aline, "\"  \"")) s = strstr(aline, "\"  \"");
 23.1324 +        if (*s) {
 23.1325 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1326 +            if (!pswit[OVERVIEW_SWITCH])
 23.1327 +                printf("    Line %ld column %d - Query missing paragraph break?\n", linecnt, (int)(s - aline) +1);
 23.1328 +            else
 23.1329 +                cnt_punct++;
 23.1330 +            }
 23.1331 +
 23.1332 +
 23.1333 +
 23.1334 +        /* Check for "to he" and other easy he/be errors          */
 23.1335 +        /* This is a very inadequate effort on the he/be problem, */
 23.1336 +        /* but the phrase "to he" is always an error, whereas "to */
 23.1337 +        /* be" is quite common. I chuckle when it does catch one! */
 23.1338 +        /* Similarly, '"Quiet!", be said.' is a non-be error      */
 23.1339 +        /* V .18 - "to he" is _not_ always an error!:             */
 23.1340 +        /*           "Where they went to he couldn't say."        */
 23.1341 +        /* but I'm leaving it in anyway.                          */
 23.1342 +        /* V .20 Another false positive:                          */
 23.1343 +        /*       What would "Cinderella" be without the . . .     */
 23.1344 +        /* and another "If he wants to he can see for himself."   */
 23.1345 +        /* V .21 Added " is be " and " be is " and " be was "     */
 23.1346 +        /* V .99 Added jeebies code -- removed again.             */
 23.1347 +        /*       Is jeebies code worth adding? Rare to see he/be  */
 23.1348 +        /*       errors with modern OCR. Separate program? Yes!   */
 23.1349 +        /*       jeebies does the job without cluttering up this. */
 23.1350 +        /*       We do get a few more queryable pairs from the    */
 23.1351 +        /*       project though -- they're cheap to implement.    */
 23.1352 +        /*       Also added a column number for guiguts.          */
 23.1353 +
 23.1354 +        s = wrk;
 23.1355 +        *s = 0;
 23.1356 +        if (strstr(aline," to he ")) s = strstr(aline," to he ");
 23.1357 +        if (strstr(aline,"\" be ")) s = strstr(aline,"\" be ");
 23.1358 +        if (strstr(aline,"\", be ")) s = strstr(aline,"\", be ");
 23.1359 +        if (strstr(aline," is be ")) s = strstr(aline," is be ");
 23.1360 +        if (strstr(aline," be is ")) s = strstr(aline," be is ");
 23.1361 +        if (strstr(aline," was be ")) s = strstr(aline," was be ");
 23.1362 +        if (strstr(aline," be would ")) s = strstr(aline," be would ");
 23.1363 +        if (strstr(aline," be could ")) s = strstr(aline," be could ");
 23.1364 +        if (*s) {
 23.1365 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1366 +            if (!pswit[OVERVIEW_SWITCH])
 23.1367 +                printf("    Line %ld column %d - Query he/be error?\n", linecnt, (int)(s - aline) +1);
 23.1368 +            else
 23.1369 +                cnt_word++;
 23.1370 +            }
 23.1371 +
 23.1372 +        s = wrk;
 23.1373 +        *s = 0;
 23.1374 +        if (strstr(aline," i bad ")) s = strstr(aline," i bad ");
 23.1375 +        if (strstr(aline," you bad ")) s = strstr(aline," you bad ");
 23.1376 +        if (strstr(aline," he bad ")) s = strstr(aline," he bad ");
 23.1377 +        if (strstr(aline," she bad ")) s = strstr(aline," she bad ");
 23.1378 +        if (strstr(aline," they bad ")) s = strstr(aline," they bad ");
 23.1379 +        if (strstr(aline," a had ")) s = strstr(aline," a had ");
 23.1380 +        if (strstr(aline," the had ")) s = strstr(aline," the had ");
 23.1381 +        if (*s) {
 23.1382 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1383 +            if (!pswit[OVERVIEW_SWITCH])
 23.1384 +                printf("    Line %ld column %d - Query had/bad error?\n", linecnt, (int)(s - aline) +1);
 23.1385 +            else
 23.1386 +                cnt_word++;
 23.1387 +            }
 23.1388 +
 23.1389 +
 23.1390 +        /* V .97 Added ", hut "  Not too common, hut pretty certain   */
 23.1391 +        /* V.99 changed to add a column number for guiguts            */
 23.1392 +        s = wrk;
 23.1393 +        *s = 0;
 23.1394 +        if (strstr(aline,", hut ")) s = strstr(aline,", hut ");
 23.1395 +        if (strstr(aline,"; hut ")) s = strstr(aline,"; hut ");
 23.1396 +        if (*s) {
 23.1397 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1398 +            if (!pswit[OVERVIEW_SWITCH])
 23.1399 +                printf("    Line %ld column %d - Query hut/but error?\n", linecnt, (int)(s - aline) +1);
 23.1400 +            else
 23.1401 +                cnt_word++;
 23.1402 +            }
 23.1403 +
 23.1404 +        /* Special case - angled bracket in front of "From" placed there by an MTA */
 23.1405 +        /* when sending an e-mail.  V .21                                          */
 23.1406 +        if (strstr(aline, ">From")) {
 23.1407 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1408 +            if (!pswit[OVERVIEW_SWITCH])
 23.1409 +                printf("    Line %ld column %d - Query angled bracket with From\n", linecnt, (int)(strstr(aline, ">From") - aline) +1);
 23.1410 +            else
 23.1411 +                cnt_punct++;
 23.1412 +            }
 23.1413 +
 23.1414 +        /* V 0.98 Check for a single character line - often an overflow from bad wrapping. */
 23.1415 +        if (*aline && !*(aline+1)) {
 23.1416 +            if (*aline == 'I' || *aline == 'V' || *aline == 'X' || *aline == 'L' || gcisdigit(*aline))
 23.1417 +                ; /* nothing - ignore numerals alone on a line. */
 23.1418 +            else {
 23.1419 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1420 +                if (!pswit[OVERVIEW_SWITCH])
 23.1421 +                    printf("    Line %ld column 1 - Query single character line\n", linecnt);
 23.1422 +                else
 23.1423 +                    cnt_punct++;
 23.1424 +                }
 23.1425 +            }
 23.1426 +
 23.1427 +        /* V 0.98 Check for I" - often should be ! */
 23.1428 +        if (strstr(aline, " I\"")) {
 23.1429 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1430 +            if (!pswit[OVERVIEW_SWITCH])
 23.1431 +                printf("    Line %ld column %ld - Query I=exclamation mark?\n", linecnt, strstr(aline, " I\"") - aline);
 23.1432 +            else
 23.1433 +                cnt_punct++;
 23.1434 +            }
 23.1435 +
 23.1436 +        /* V 0.98 Check for period without a capital letter. Cut-down from gutspell */
 23.1437 +        /*        Only works when it happens on a single line.                      */
 23.1438 +
 23.1439 +        if (pswit[PARANOID_SWITCH])
 23.1440 +            for (t = s = aline; strstr(t,". ");) {
 23.1441 +                t = strstr(t, ". ");
 23.1442 +                if (t == s)  {
 23.1443 +                    t++;
 23.1444 +                    continue; /* start of line punctuation is handled elsewhere */
 23.1445 +                    }
 23.1446 +                if (!gcisalpha(*(t-1))) {
 23.1447 +                    t++;
 23.1448 +                    continue;
 23.1449 +                    }
 23.1450 +                if (isDutch) {  /* For Frank & Jeroen -- 's Middags case */
 23.1451 +                    if (*(t+2) == CHAR_SQUOTE &&
 23.1452 +                      *(t+3)>='a' && *(t+3)<='z' &&
 23.1453 +                      *(t+4) == CHAR_SPACE &&
 23.1454 +                      *(t+5)>='A' && *(t+5)<='Z') {
 23.1455 +                        t++;
 23.1456 +                        continue;
 23.1457 +                        }
 23.1458 +                      }
 23.1459 +                s1 = t+2;
 23.1460 +                while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
 23.1461 +                    s1++;
 23.1462 +                if (*s1 >= 'a' && *s1 <= 'z') {  /* we have something to investigate */
 23.1463 +                    istypo = 1;
 23.1464 +                    for (s1 = t - 1; s1 >= s && 
 23.1465 +                        (gcisalpha(*s1) || gcisdigit(*s1) || 
 23.1466 +                        (*s1 == CHAR_SQUOTE && gcisalpha(*(s1+1)) && gcisalpha(*(s1-1)))); s1--); /* so let's go back and find out */
 23.1467 +                    s1++;
 23.1468 +                    for (i = 0; *s1 && *s1 != '.'; s1++, i++)
 23.1469 +                        testword[i] = *s1;
 23.1470 +                    testword[i] = 0;
 23.1471 +                    for (i = 0; *abbrev[i]; i++)
 23.1472 +                        if (!strcmp(testword, abbrev[i]))
 23.1473 +                            istypo = 0;
 23.1474 +//                    if (*testword >= 'A' && *testword <= 'Z') 
 23.1475 +//                        istypo = 0;
 23.1476 +                    if (gcisdigit(*testword)) istypo = 0;
 23.1477 +                    if (!*(testword+1)) istypo = 0;
 23.1478 +                    if (isroman(testword)) istypo = 0;
 23.1479 +                    if (istypo) {
 23.1480 +                        istypo = 0;
 23.1481 +                        for (i = 0; testword[i]; i++)
 23.1482 +                            if (strchr(vowels, testword[i]))
 23.1483 +                                istypo = 1;
 23.1484 +                        }
 23.1485 +                    if (istypo) {
 23.1486 +                        isdup = 0;
 23.1487 +                        if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
 23.1488 +                            for (i = 0; i < qperiod_index; i++)
 23.1489 +                                if (!strcmp(testword, qperiod[i])) {
 23.1490 +                                    isdup = 1;
 23.1491 +                                    }
 23.1492 +                        if (!isdup) {
 23.1493 +                            if (qperiod_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
 23.1494 +                                strcpy(qperiod[qperiod_index], testword);
 23.1495 +                                qperiod_index++;
 23.1496 +                                }
 23.1497 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1498 +                            if (!pswit[OVERVIEW_SWITCH])
 23.1499 +                                printf("    Line %ld column %d - Extra period?\n", linecnt, (int)(t - aline)+1);
 23.1500 +                            else
 23.1501 +                                cnt_punct++;
 23.1502 +                            }
 23.1503 +                        }
 23.1504 +                    }
 23.1505 +                t++;
 23.1506 +                }
 23.1507 +
 23.1508 +
 23.1509 +        if (pswit[TYPO_SWITCH]) {    /* Should have put this condition in at the start of 0.99. Duh! */
 23.1510 +            /* Check for words usually not followed by punctuation 0.99 */
 23.1511 +            for (s = aline; *s;) {
 23.1512 +                wordstart = s;
 23.1513 +                s = getaword(s, inword);
 23.1514 +                if (!*inword) continue;
 23.1515 +                lowerit(inword);
 23.1516 +                for (i = 0; *nocomma[i]; i++)
 23.1517 +                    if (!strcmp(inword, nocomma[i])) {
 23.1518 +                        if (*s == ',' || *s == ';' || *s == ':') {
 23.1519 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1520 +                            if (!pswit[OVERVIEW_SWITCH])
 23.1521 +                                printf("    Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
 23.1522 +                            else
 23.1523 +                                cnt_punct++;
 23.1524 +                            }
 23.1525 +                        }
 23.1526 +                for (i = 0; *noperiod[i]; i++)
 23.1527 +                    if (!strcmp(inword, noperiod[i])) {
 23.1528 +                        if (*s == '.' || *s == '!') {
 23.1529 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1530 +                            if (!pswit[OVERVIEW_SWITCH])
 23.1531 +                                printf("    Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
 23.1532 +                            else
 23.1533 +                                cnt_punct++;
 23.1534 +                            }
 23.1535 +                        }
 23.1536 +                }
 23.1537 +            }
 23.1538 +
 23.1539 +
 23.1540 +
 23.1541 +        /* Check for commonly mistyped words, and digits like 0 for O in a word */
 23.1542 +        for (s = aline; *s;) {
 23.1543 +            wordstart = s;
 23.1544 +            s = getaword(s, inword);
 23.1545 +            if (!*inword) continue; /* don't bother with empty lines */
 23.1546 +            if (mixdigit(inword)) {
 23.1547 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1548 +                if (!pswit[OVERVIEW_SWITCH])
 23.1549 +                    printf("    Line %ld column %ld - Query digit in %s\n", linecnt, (int)(wordstart - aline) + 1, inword);
 23.1550 +                else
 23.1551 +                    cnt_word++;
 23.1552 +                }
 23.1553 +
 23.1554 +            /* put the word through a series of tests for likely typos and OCR errors */
 23.1555 +            /* V.21 I had allowed lots of typo-checking even with the typo switch     */
 23.1556 +            /* turned off, but I really should disallow reporting of them when        */
 23.1557 +            /* the switch is off. Hence the "if" below.                               */
 23.1558 +            if (pswit[TYPO_SWITCH]) {
 23.1559 +                istypo = 0;
 23.1560 +                strcpy(testword, inword);
 23.1561 +                alower = 0;
 23.1562 +                for (i = 0; i < (signed int)strlen(testword); i++) { /* lowercase for testing */
 23.1563 +                    if (testword[i] >= 'a' && testword[i] <= 'z') alower = 1;
 23.1564 +                    if (alower && testword[i] >= 'A' && testword[i] <= 'Z') {
 23.1565 +                        /* we have an uppercase mid-word. However, there are common cases: */
 23.1566 +                        /*   Mac and Mc like McGill                                        */
 23.1567 +                        /*   French contractions like l'Abbe                               */
 23.1568 +                        if ((i == 2 && testword[0] == 'm' && testword[1] == 'c') ||
 23.1569 +                            (i == 3 && testword[0] == 'm' && testword[1] == 'a' && testword[2] == 'c') ||
 23.1570 +                            (i > 0 && testword[i-1] == CHAR_SQUOTE))
 23.1571 +                                ; /* do nothing! */
 23.1572 +
 23.1573 +                        else {  /* V.97 - remove separate case of uppercase within word so that         */
 23.1574 +                                /* names like VanAllen fall into qword_index and get reported only once */
 23.1575 +                            istypo = 1;
 23.1576 +                            }
 23.1577 +                        }
 23.1578 +                    testword[i] = (char)tolower(testword[i]);
 23.1579 +                    }
 23.1580 +
 23.1581 +                /* check for certain unlikely two-letter combinations at word start and end */
 23.1582 +                /* V.0.97 - this replaces individual hardcoded checks in previous versions */
 23.1583 +                if (strlen(testword) > 1) {
 23.1584 +                    for (i = 0; *nostart[i]; i++)
 23.1585 +                        if (!strncmp(testword, nostart[i], 2))
 23.1586 +                            istypo = 1;
 23.1587 +                    for (i = 0; *noend[i]; i++)
 23.1588 +                        if (!strncmp(testword + strlen(testword) -2, noend[i], 2))
 23.1589 +                            istypo = 1;
 23.1590 +                    }
 23.1591 +
 23.1592 +
 23.1593 +                /* ght is common, gbt never. Like that. */
 23.1594 +                if (strstr(testword, "cb")) istypo = 1;
 23.1595 +                if (strstr(testword, "gbt")) istypo = 1;
 23.1596 +                if (strstr(testword, "pbt")) istypo = 1;
 23.1597 +                if (strstr(testword, "tbs")) istypo = 1;
 23.1598 +                if (strstr(testword, "mrn")) istypo = 1;
 23.1599 +                if (strstr(testword, "ahle")) istypo = 1;
 23.1600 +                if (strstr(testword, "ihle")) istypo = 1;
 23.1601 +
 23.1602 +                /* "TBE" does happen - like HEARTBEAT - but uncommon.                    */
 23.1603 +                /*  Also "TBI" - frostbite, outbid - but uncommon.                       */
 23.1604 +                /*  Similarly "ii" like Hawaii, or Pompeii, and in Roman numerals,       */
 23.1605 +                /*  but these are covered in V.20. "ii" is a common scanno.              */
 23.1606 +                if (strstr(testword, "tbi")) istypo = 1;
 23.1607 +                if (strstr(testword, "tbe")) istypo = 1;
 23.1608 +                if (strstr(testword, "ii")) istypo = 1;
 23.1609 +
 23.1610 +                /* check for no vowels or no consonants. */
 23.1611 +                /* If none, flag a typo                  */
 23.1612 +                if (!istypo && strlen(testword)>1) {
 23.1613 +                    vowel = consonant = 0;
 23.1614 +                    for (i = 0; testword[i]; i++)
 23.1615 +                        if (testword[i] == 'y' || gcisdigit(testword[i])) {  /* Yah, this is loose. */
 23.1616 +                            vowel++;
 23.1617 +                            consonant++;
 23.1618 +                            }
 23.1619 +                        else
 23.1620 +                            if  (strchr(vowels, testword[i])) vowel++;
 23.1621 +                            else consonant++;
 23.1622 +                    if (!vowel || !consonant) {
 23.1623 +                        istypo = 1;
 23.1624 +                        }
 23.1625 +                    }
 23.1626 +
 23.1627 +                /* now exclude the word from being reported if it's in */
 23.1628 +                /* the okword list                                     */
 23.1629 +                for (i = 0; *okword[i]; i++)
 23.1630 +                    if (!strcmp(testword, okword[i]))
 23.1631 +                        istypo = 0;
 23.1632 +
 23.1633 +                /* what looks like a typo may be a Roman numeral. Exclude these */
 23.1634 +                if (istypo)
 23.1635 +                    if (isroman(testword))
 23.1636 +                        istypo = 0;
 23.1637 +
 23.1638 +                /* check the manual list of typos */
 23.1639 +                if (!istypo)
 23.1640 +                    for (i = 0; *typo[i]; i++)
 23.1641 +                        if (!strcmp(testword, typo[i]))
 23.1642 +                            istypo = 1;
 23.1643 +
 23.1644 +
 23.1645 +                /* V.21 - check lowercase s and l - special cases */
 23.1646 +                /* V.98 - added "i" and "m"                       */
 23.1647 +                /* V.99 - added "j" often a semi-colon gone wrong */
 23.1648 +                /*      - and "d" for a missing apostrophe - he d */
 23.1649 +                /*      - and "n" for "in"                        */
 23.1650 +                if (!istypo && strlen(testword) == 1)
 23.1651 +                    if (strchr("slmijdn", *inword))
 23.1652 +                        istypo = 1;
 23.1653 +
 23.1654 +
 23.1655 +                if (istypo) {
 23.1656 +                    isdup = 0;
 23.1657 +                    if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
 23.1658 +                        for (i = 0; i < qword_index; i++)
 23.1659 +                            if (!strcmp(testword, qword[i])) {
 23.1660 +                                isdup = 1;
 23.1661 +                                ++dupcnt[i];
 23.1662 +                                }
 23.1663 +                    if (!isdup) {
 23.1664 +                        if (qword_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
 23.1665 +                            strcpy(qword[qword_index], testword);
 23.1666 +                            qword_index++;
 23.1667 +                            }
 23.1668 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1669 +                        if (!pswit[OVERVIEW_SWITCH]) {
 23.1670 +                            printf("    Line %ld column %d - Query word %s", linecnt, (int)(wordstart - aline) + 1, inword);
 23.1671 +                            if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
 23.1672 +                                printf(" - not reporting duplicates");
 23.1673 +                            printf("\n");
 23.1674 +                            }
 23.1675 +                        else
 23.1676 +                            cnt_word++;
 23.1677 +                        }
 23.1678 +                    }
 23.1679 +                }        /* end of typo-checking */
 23.1680 +
 23.1681 +                /* check the user's list of typos */
 23.1682 +                if (!istypo)
 23.1683 +                    if (usertypo_count)
 23.1684 +                        for (i = 0; i < usertypo_count; i++)
 23.1685 +                            if (!strcmp(testword, usertypo[i])) {
 23.1686 +                                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1687 +                                if (!pswit[OVERVIEW_SWITCH])  
 23.1688 +                                    printf("    Line %ld column %d - Query possible scanno %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
 23.1689 +                                }
 23.1690 +
 23.1691 +
 23.1692 +
 23.1693 +            if (pswit[PARANOID_SWITCH] && warn_digit) {   /* in paranoid mode, query all 0 and 1 standing alone - added warn_digit V.97*/
 23.1694 +                if (!strcmp(inword, "0") || !strcmp(inword, "1")) {
 23.1695 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1696 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1697 +                        printf("    Line %ld column %d - Query standalone %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
 23.1698 +                    else
 23.1699 +                        cnt_word++;
 23.1700 +                    }
 23.1701 +                }
 23.1702 +            }
 23.1703 +
 23.1704 +        /* look for added or missing spaces around punctuation and quotes */
 23.1705 +        /* If there is a punctuation character like ! with no space on    */
 23.1706 +        /* either side, suspect a missing!space. If there are spaces on   */
 23.1707 +        /* both sides , assume a typo. If we see a double quote with no   */
 23.1708 +        /* space or punctuation on either side of it, assume unspaced     */
 23.1709 +        /* quotes "like"this.                                             */
 23.1710 +        llen = strlen(aline);
 23.1711 +        for (i = 1; i < llen; i++) {                               /* for each character in the line after the first */
 23.1712 +            if  (strchr(".?!,;:_", aline[i])) {                    /* if it's punctuation */
 23.1713 +                isacro = 0;                       /* we need to suppress warnings for acronyms like M.D. */
 23.1714 +                isellipsis = 0;                   /* we need to suppress warnings for ellipsis . . . */
 23.1715 +                if ( (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) ||     /* if there are letters on both sides of it or ... */
 23.1716 +                   (gcisalpha(aline[i+1]) && strchr("?!,;:", aline[i]))) { /* ...if it's strict punctuation followed by an alpha */
 23.1717 +                    if (aline[i] == '.') {
 23.1718 +                        if (i > 2)
 23.1719 +                            if (aline[i-2] == '.') isacro = 1;
 23.1720 +                        if (i + 2 < llen)
 23.1721 +                            if (aline[i+2] == '.') isacro = 1;
 23.1722 +                        }
 23.1723 +                    if (!isacro) {
 23.1724 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1725 +                        if (!pswit[OVERVIEW_SWITCH])
 23.1726 +                            printf("    Line %ld column %d - Missing space?\n", linecnt, i+1);
 23.1727 +                        else
 23.1728 +                            cnt_punct++;
 23.1729 +                        }
 23.1730 +                    }
 23.1731 +                if (aline[i-1] == CHAR_SPACE && (aline[i+1] == CHAR_SPACE || aline[i+1] == 0)) { /* if there are spaces on both sides, or space before and end of line */
 23.1732 +                    if (aline[i] == '.') {
 23.1733 +                        if (i > 2)
 23.1734 +                            if (aline[i-2] == '.') isellipsis = 1;
 23.1735 +                        if (i + 2 < llen)
 23.1736 +                            if (aline[i+2] == '.') isellipsis = 1;
 23.1737 +                        }
 23.1738 +                    if (!isemptyline && !isellipsis) {
 23.1739 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1740 +                        if (!pswit[OVERVIEW_SWITCH])
 23.1741 +                            printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
 23.1742 +                        else
 23.1743 +                            cnt_punct++;
 23.1744 +                        }
 23.1745 +                    }
 23.1746 +                }
 23.1747 +            }
 23.1748 +
 23.1749 +        /* 0.98 -- split out the characters that CANNOT be preceded by space */
 23.1750 +        llen = strlen(aline);
 23.1751 +        for (i = 1; i < llen; i++) {                             /* for each character in the line after the first */
 23.1752 +            if  (strchr("?!,;:", aline[i])) {                    /* if it's punctuation that _cannot_ have a space before it */
 23.1753 +                if (aline[i-1] == CHAR_SPACE && !isemptyline && aline[i+1] != CHAR_SPACE) { /* if aline[i+1) DOES == space, it was already reported just above */
 23.1754 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1755 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1756 +                        printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
 23.1757 +                    else
 23.1758 +                        cnt_punct++;
 23.1759 +                    }
 23.1760 +                }
 23.1761 +            }
 23.1762 +
 23.1763 +
 23.1764 +        /* 0.99 -- special case " .X" where X is any alpha. */
 23.1765 +        /* This plugs a hole in the acronym code above. Inelegant, but maintainable. */
 23.1766 +        llen = strlen(aline);
 23.1767 +        for (i = 1; i < llen; i++) {             /* for each character in the line after the first */
 23.1768 +            if  (aline[i] == '.') {              /* if it's a period */
 23.1769 +                if (aline[i-1] == CHAR_SPACE && gcisalpha(aline[i+1])) { /* if the period follows a space and is followed by a letter */
 23.1770 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1771 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1772 +                        printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
 23.1773 +                    else
 23.1774 +                        cnt_punct++;
 23.1775 +                    }
 23.1776 +                }
 23.1777 +            }
 23.1778 +
 23.1779 +
 23.1780 +
 23.1781 +
 23.1782 +        /* v.21 breaking out the search for unspaced doublequotes        */
 23.1783 +        /* This is not as efficient, but it's more maintainable          */
 23.1784 +        /* V.97 added underscore to the list of characters not to query, */
 23.1785 +        /* since underscores are commonly used as italics indicators.    */
 23.1786 +        /* V.98 Added slash as well, same reason.                        */
 23.1787 +        for (i = 1; i < llen; i++) {                               /* for each character in the line after the first */
 23.1788 +            if (aline[i] == CHAR_DQUOTE) {
 23.1789 +                if ((!strchr(" _-.'`,;:!/([{?}])",  aline[i-1]) &&
 23.1790 +                     !strchr(" _-.'`,;:!/([{?}])",  aline[i+1]) &&
 23.1791 +                     aline[i+1] != 0
 23.1792 +                     || (!strchr(" _-([{'`", aline[i-1]) && gcisalpha(aline[i+1])))) {
 23.1793 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1794 +                        if (!pswit[OVERVIEW_SWITCH])
 23.1795 +                            printf("    Line %ld column %d - Unspaced quotes?\n", linecnt, i+1);
 23.1796 +                        else
 23.1797 +                            cnt_punct++;
 23.1798 +                        }
 23.1799 +                }
 23.1800 +            }
 23.1801 +
 23.1802 +
 23.1803 +        /* v.98 check parity of quotes                             */
 23.1804 +        /* v.99 added !*(s+1) in some tests to catch "I am," he said, but I will not be soon". */
 23.1805 +        for (s = aline; *s; s++) {
 23.1806 +            if (*s == CHAR_DQUOTE) {
 23.1807 +                if (!(dquotepar = !dquotepar)) {    /* parity even */
 23.1808 +                    if (!strchr("_-.'`/,;:!?)]} ",  *(s+1))) {
 23.1809 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1810 +                        if (!pswit[OVERVIEW_SWITCH])
 23.1811 +                            printf("    Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
 23.1812 +                        else
 23.1813 +                            cnt_punct++;
 23.1814 +                        }
 23.1815 +                    }
 23.1816 +                else {                              /* parity odd */
 23.1817 +                    if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/.'`([{$",  *(s+1)) || !*(s+1)) {
 23.1818 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1819 +                        if (!pswit[OVERVIEW_SWITCH])
 23.1820 +                            printf("    Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
 23.1821 +                        else
 23.1822 +                            cnt_punct++;
 23.1823 +                        }
 23.1824 +                    }
 23.1825 +                }
 23.1826 +            }
 23.1827 +
 23.1828 +            if (*aline == CHAR_DQUOTE) {
 23.1829 +                if (strchr(",;:!?)]} ", aline[1])) {
 23.1830 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1831 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1832 +                        printf("    Line %ld column 1 - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
 23.1833 +                    else
 23.1834 +                        cnt_punct++;
 23.1835 +                    }
 23.1836 +                }
 23.1837 +
 23.1838 +        if (pswit[SQUOTE_SWITCH])
 23.1839 +            for (s = aline; *s; s++) {
 23.1840 +                if ((*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
 23.1841 +                     && ( s == aline || (s > aline && !gcisalpha(*(s-1))) || !gcisalpha(*(s+1)))) {
 23.1842 +                    if (!(squotepar = !squotepar)) {    /* parity even */
 23.1843 +                        if (!strchr("_-.'`/\",;:!?)]} ",  *(s+1))) {
 23.1844 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1845 +                            if (!pswit[OVERVIEW_SWITCH])
 23.1846 +                                printf("    Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
 23.1847 +                            else
 23.1848 +                                cnt_punct++;
 23.1849 +                            }
 23.1850 +                        }
 23.1851 +                    else {                              /* parity odd */
 23.1852 +                        if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/\".'`",  *(s+1)) || !*(s+1)) {
 23.1853 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1854 +                            if (!pswit[OVERVIEW_SWITCH])
 23.1855 +                                printf("    Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
 23.1856 +                            else
 23.1857 +                                cnt_punct++;
 23.1858 +                            }
 23.1859 +                        }
 23.1860 +                    }
 23.1861 +                }
 23.1862 +                    
 23.1863 +
 23.1864 +        /* v.20 also look for double punctuation like ,. or ,,     */
 23.1865 +        /* Thanks to DW for the suggestion!                        */
 23.1866 +        /* I'm putting this in a separate loop for clarity         */
 23.1867 +        /* In books with references, ".," and ".;" are common      */
 23.1868 +        /* e.g. "etc., etc.," and vol. 1.; vol 3.;                 */
 23.1869 +        /* OTOH, from my initial tests, there are also fairly      */
 23.1870 +        /* common errors. What to do? Make these cases paranoid?   */
 23.1871 +        /* V.21 ".," is the most common, so invented warn_dotcomma */
 23.1872 +        /* to suppress detailed reporting if it occurs often       */
 23.1873 +        llen = strlen(aline);
 23.1874 +        for (i = 0; i < llen; i++)                  /* for each character in the line */
 23.1875 +            if (strchr(".?!,;:", aline[i])          /* if it's punctuation */
 23.1876 +            && (strchr(".?!,;:", aline[i+1]))
 23.1877 +            && aline[i] && aline[i+1])      /* followed by punctuation, it's a query, unless . . . */
 23.1878 +                if (
 23.1879 +                  (aline[i] == aline[i+1]
 23.1880 +                  && (aline[i] == '.' || aline[i] == '?' || aline[i] == '!'))
 23.1881 +                  || (!warn_dotcomma && aline[i] == '.' && aline[i+1] == ',')
 23.1882 +                  || (isFrench && !strncmp(aline+i, ",...", 4))
 23.1883 +                  || (isFrench && !strncmp(aline+i, "...,", 4))
 23.1884 +                  || (isFrench && !strncmp(aline+i, ";...", 4))
 23.1885 +                  || (isFrench && !strncmp(aline+i, "...;", 4))
 23.1886 +                  || (isFrench && !strncmp(aline+i, ":...", 4))
 23.1887 +                  || (isFrench && !strncmp(aline+i, "...:", 4))
 23.1888 +                  || (isFrench && !strncmp(aline+i, "!...", 4))
 23.1889 +                  || (isFrench && !strncmp(aline+i, "...!", 4))
 23.1890 +                  || (isFrench && !strncmp(aline+i, "?...", 4))
 23.1891 +                  || (isFrench && !strncmp(aline+i, "...?", 4))
 23.1892 +                ) {
 23.1893 +                if ((isFrench && !strncmp(aline+i, ",...", 4))    /* could this BE any more awkward? */
 23.1894 +                  || (isFrench && !strncmp(aline+i, "...,", 4))
 23.1895 +                  || (isFrench && !strncmp(aline+i, ";...", 4))
 23.1896 +                  || (isFrench && !strncmp(aline+i, "...;", 4))
 23.1897 +                  || (isFrench && !strncmp(aline+i, ":...", 4))
 23.1898 +                  || (isFrench && !strncmp(aline+i, "...:", 4))
 23.1899 +                  || (isFrench && !strncmp(aline+i, "!...", 4))
 23.1900 +                  || (isFrench && !strncmp(aline+i, "...!", 4))
 23.1901 +                  || (isFrench && !strncmp(aline+i, "?...", 4))
 23.1902 +                  || (isFrench && !strncmp(aline+i, "...?", 4)))
 23.1903 +                    i +=4;
 23.1904 +                        ; /* do nothing for .. !! and ?? which can be legit */
 23.1905 +                    }
 23.1906 +                else {
 23.1907 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1908 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1909 +                        printf("    Line %ld column %d - Double punctuation?\n", linecnt, i+1);
 23.1910 +                    else
 23.1911 +                        cnt_punct++;
 23.1912 +                    }
 23.1913 +
 23.1914 +        /* v.21 breaking out the search for spaced doublequotes */
 23.1915 +        /* This is not as efficient, but it's more maintainable */
 23.1916 +        s = aline;
 23.1917 +        while (strstr(s," \" ")) {
 23.1918 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1919 +            if (!pswit[OVERVIEW_SWITCH])
 23.1920 +                printf("    Line %ld column %d - Spaced doublequote?\n", linecnt, (int)(strstr(s," \" ")-aline+1));
 23.1921 +            else
 23.1922 +                cnt_punct++;
 23.1923 +            s = strstr(s," \" ") + 2;
 23.1924 +            }
 23.1925 +
 23.1926 +        /* v.20 also look for spaced singlequotes ' and `  */
 23.1927 +        s = aline;
 23.1928 +        while (strstr(s," ' ")) {
 23.1929 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1930 +            if (!pswit[OVERVIEW_SWITCH])
 23.1931 +                printf("    Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ' ")-aline+1));
 23.1932 +            else
 23.1933 +                cnt_punct++;
 23.1934 +            s = strstr(s," ' ") + 2;
 23.1935 +            }
 23.1936 +
 23.1937 +        s = aline;
 23.1938 +        while (strstr(s," ` ")) {
 23.1939 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1940 +            if (!pswit[OVERVIEW_SWITCH])
 23.1941 +                printf("    Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ` ")-aline+1));
 23.1942 +            else
 23.1943 +                cnt_punct++;
 23.1944 +            s = strstr(s," ` ") + 2;
 23.1945 +            }
 23.1946 +
 23.1947 +        /* v.99 check special case of 'S instead of 's at end of word */
 23.1948 +        s = aline + 1;
 23.1949 +        while (*s) {
 23.1950 +            if (*s == CHAR_SQUOTE && *(s+1) == 'S' && *(s-1)>='a' && *(s-1)<='z')  {
 23.1951 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1952 +                if (!pswit[OVERVIEW_SWITCH])
 23.1953 +                    printf("    Line %ld column %d - Capital \"S\"?\n", linecnt, (int)(s-aline+2));
 23.1954 +                else
 23.1955 +                    cnt_punct++;
 23.1956 +                }
 23.1957 +            s++;
 23.1958 +            }
 23.1959 +
 23.1960 +
 23.1961 +        /* v.21 Now check special cases - start and end of line - */
 23.1962 +        /* for single and double quotes. Start is sometimes [sic] */
 23.1963 +        /* but better to query it anyway.                         */
 23.1964 +        /* While I'm here, check for dash at end of line          */
 23.1965 +        llen = strlen(aline);
 23.1966 +        if (llen > 1) {
 23.1967 +            if (aline[llen-1] == CHAR_DQUOTE ||
 23.1968 +                aline[llen-1] == CHAR_SQUOTE ||
 23.1969 +                aline[llen-1] == CHAR_OPEN_SQUOTE)
 23.1970 +                if (aline[llen-2] == CHAR_SPACE) {
 23.1971 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1972 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1973 +                        printf("    Line %ld column %d - Spaced quote?\n", linecnt, llen);
 23.1974 +                    else
 23.1975 +                        cnt_punct++;
 23.1976 +                    }
 23.1977 +            
 23.1978 +            /* V 0.98 removed aline[0] == CHAR_DQUOTE from the test below, since */
 23.1979 +            /* Wrongspaced quotes test also catches it for "                     */
 23.1980 +            if (aline[0] == CHAR_SQUOTE ||
 23.1981 +                aline[0] == CHAR_OPEN_SQUOTE)
 23.1982 +                if (aline[1] == CHAR_SPACE) {
 23.1983 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1984 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1985 +                        printf("    Line %ld column 1 - Spaced quote?\n", linecnt);
 23.1986 +                    else
 23.1987 +                        cnt_punct++;
 23.1988 +                    }
 23.1989 +            /* dash at end of line may well be legit - paranoid mode only */
 23.1990 +            /* and don't report em-dash at line-end                       */
 23.1991 +            if (pswit[PARANOID_SWITCH] && warn_hyphen) {
 23.1992 +                for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
 23.1993 +                if (aline[i] == '-' && aline[i-1] != '-') {
 23.1994 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.1995 +                    if (!pswit[OVERVIEW_SWITCH])
 23.1996 +                        printf("    Line %ld column %d - Hyphen at end of line?\n", linecnt, i);
 23.1997 +                    }
 23.1998 +                }
 23.1999 +            }
 23.2000 +
 23.2001 +        /* v.21 also look for brackets surrounded by alpha                    */
 23.2002 +        /* Brackets are often unspaced, but shouldn't be surrounded by alpha. */
 23.2003 +        /* If so, suspect a scanno like "a]most"                              */
 23.2004 +        llen = strlen(aline);
 23.2005 +        for (i = 1; i < llen-1; i++) {           /* for each character in the line except 1st & last*/
 23.2006 +            if (strchr("{[()]}", aline[i])         /* if it's a bracket */
 23.2007 +                && gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) {
 23.2008 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.2009 +                if (!pswit[OVERVIEW_SWITCH])
 23.2010 +                    printf("    Line %ld column %d - Unspaced bracket?\n", linecnt, i);
 23.2011 +                else
 23.2012 +                    cnt_punct++;
 23.2013 +                }
 23.2014 +            }
 23.2015 +        /* The "Cinderella" case, back in again! :-S Give it another shot */
 23.2016 +        if (warn_endquote) {
 23.2017 +            llen = strlen(aline);
 23.2018 +            for (i = 1; i < llen; i++) {           /* for each character in the line except 1st */
 23.2019 +                if (aline[i] == CHAR_DQUOTE)
 23.2020 +                    if (isalpha(aline[i-1])) {
 23.2021 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.2022 +                        if (!pswit[OVERVIEW_SWITCH])
 23.2023 +                            printf("    Line %ld column %d - endquote missing punctuation?\n", linecnt, i);
 23.2024 +                        else
 23.2025 +                            cnt_punct++;
 23.2026 +                        }
 23.2027 +                }
 23.2028 +            }
 23.2029 +
 23.2030 +        llen = strlen(aline);
 23.2031 +
 23.2032 +        /* Check for <HTML TAG> */
 23.2033 +        /* If there is a < in the line, followed at some point  */
 23.2034 +        /* by a > then we suspect HTML                          */
 23.2035 +        if (strstr(aline, "<") && strstr(aline, ">")) {
 23.2036 +            i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
 23.2037 +            if (i > 0) {
 23.2038 +                strncpy(wrk, strstr(aline, "<"), i);
 23.2039 +                wrk[i] = 0;
 23.2040 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.2041 +                if (!pswit[OVERVIEW_SWITCH])
 23.2042 +                    printf("    Line %ld column %d - HTML Tag? %s \n", linecnt, (int)(strstr(aline, "<") - aline) + 1, wrk);
 23.2043 +                else
 23.2044 +                    cnt_html++;
 23.2045 +                }
 23.2046 +            }
 23.2047 +
 23.2048 +        /* Check for &symbol; HTML                   */
 23.2049 +        /* If there is a & in the line, followed at  */
 23.2050 +        /* some point by a ; then we suspect HTML    */
 23.2051 +        if (strstr(aline, "&") && strstr(aline, ";")) {
 23.2052 +            i = (int)(strstr(aline, ";") - strstr(aline, "&") + 1);
 23.2053 +            for (s = strstr(aline, "&"); s < strstr(aline, ";"); s++)   
 23.2054 +                if (*s == CHAR_SPACE) i = 0;                /* 0.99 don't report "Jones & Son;" */
 23.2055 +            if (i > 0) {
 23.2056 +                strncpy(wrk, strstr(aline,"&"), i);
 23.2057 +                wrk[i] = 0;
 23.2058 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 23.2059 +                if (!pswit[OVERVIEW_SWITCH])
 23.2060 +                    printf("    Line %ld column %d - HTML symbol? %s \n", linecnt, (int)(strstr(aline, "&") - aline) + 1, wrk);
 23.2061 +                else
 23.2062 +                    cnt_html++;
 23.2063 +                }
 23.2064 +            }
 23.2065 +
 23.2066 +        /* At end of paragraph, check for mismatched quotes.           */
 23.2067 +        /* We don't want to report an error immediately, since it is a */
 23.2068 +        /* common convention to omit the quotes at end of paragraph if */
 23.2069 +        /* the next paragraph is a continuation of the same speaker.   */
 23.2070 +        /* Where this is the case, the next para should begin with a   */
 23.2071 +        /* quote, so we store the warning message and only display it  */
 23.2072 +        /* at the top of the next iteration if the new para doesn't    */
 23.2073 +        /* start with a quote.                                         */
 23.2074 +        /* The -p switch overrides this default, and warns of unclosed */
 23.2075 +        /* quotes on _every_ paragraph, whether the next begins with a */
 23.2076 +        /* quote or not.                                               */
 23.2077 +        /* Version .16 - only report mismatched single quotes if       */
 23.2078 +        /* an open_single_quotes was found.                            */
 23.2079 +
 23.2080 +        if (isemptyline) {          /* end of para - add up the totals */
 23.2081 +            if (quot % 2)
 23.2082 +                sprintf(dquote_err, "    Line %ld - Mismatched quotes\n", linecnt);
 23.2083 +            if (pswit[SQUOTE_SWITCH] && open_single_quote && (open_single_quote != close_single_quote) )
 23.2084 +                sprintf(squote_err,"    Line %ld - Mismatched singlequotes?\n", linecnt);
 23.2085 +            if (pswit[SQUOTE_SWITCH] && open_single_quote
 23.2086 +                                     && (open_single_quote != close_single_quote)
 23.2087 +                                     && (open_single_quote != close_single_quote +1) )
 23.2088 +                squot = 1;    /* flag it to be noted regardless of the first char of the next para */
 23.2089 +            if (r_brack)
 23.2090 +                sprintf(rbrack_err, "    Line %ld - Mismatched round brackets?\n", linecnt);
 23.2091 +            if (s_brack)
 23.2092 +                sprintf(sbrack_err, "    Line %ld - Mismatched square brackets?\n", linecnt);
 23.2093 +            if (c_brack)
 23.2094 +                sprintf(cbrack_err, "    Line %ld - Mismatched curly brackets?\n", linecnt);
 23.2095 +            if (c_unders % 2)
 23.2096 +                sprintf(unders_err, "    Line %ld - Mismatched underscores?\n", linecnt);
 23.2097 +            quot = s_brack = c_brack = r_brack = c_unders =
 23.2098 +                open_single_quote = close_single_quote = 0;
 23.2099 +            isnewpara = 1;     /* let the next iteration know that it's starting a new para */
 23.2100 +            }
 23.2101 +
 23.2102 +        /* V.21 _ALSO_ at end of paragraph, check for omitted punctuation. */
 23.2103 +        /*      by working back through prevline. DW.                      */
 23.2104 +        /* Hmmm. Need to check this only for "normal" paras.               */
 23.2105 +        /* So what is a "normal" para? ouch!                               */
 23.2106 +        /* Not normal if one-liner (chapter headings, etc.)                */
 23.2107 +        /* Not normal if doesn't contain at least one locase letter        */
 23.2108 +        /* Not normal if starts with space                                 */
 23.2109 +
 23.2110 +        /* 0.99 tighten up on para end checks. Disallow comma and */
 23.2111 +        /* semi-colon. Check for legit para end before quotes.    */
 23.2112 +        if (isemptyline) {          /* end of para */
 23.2113 +            for (s = prevline, i = 0; *s && !i; s++)
 23.2114 +                if (gcisletter(*s))
 23.2115 +                    i = 1;    /* use i to indicate the presence of a letter on the line */
 23.2116 +            /* This next "if" is a problem.                                             */
 23.2117 +            /* If I say "start_para_line <= linecnt - 1", that includes one-line        */
 23.2118 +            /* "paragraphs" like chapter heads. Lotsa false positives.                  */
 23.2119 +            /* If I say "start_para_line < linecnt - 1" it doesn't, but then it         */
 23.2120 +            /* misses genuine one-line paragraphs.                                      */
 23.2121 +            /* So what do I do? */
 23.2122 +            if (i
 23.2123 +                && lastblen > 2
 23.2124 +                && start_para_line < linecnt - 1
 23.2125 +                && *prevline > CHAR_SPACE
 23.2126 +                ) {
 23.2127 +                for (i = strlen(prevline)-1; (prevline[i] == CHAR_DQUOTE || prevline[i] == CHAR_SQUOTE) && prevline[i] > CHAR_SPACE && i > 0; i--);
 23.2128 +                for (  ; i > 0; i--) {
 23.2129 +                    if (gcisalpha(prevline[i])) {
 23.2130 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
 23.2131 +                        if (!pswit[OVERVIEW_SWITCH])
 23.2132 +                            printf("    Line %ld column %d - No punctuation at para end?\n", linecnt-1, strlen(prevline));
 23.2133 +                        else
 23.2134 +                            cnt_punct++;
 23.2135 +                        break;
 23.2136 +                        }
 23.2137 +                    if (strchr("-.:!([{?}])", prevline[i]))
 23.2138 +                        break;
 23.2139 +                    }
 23.2140 +                }
 23.2141 +            }
 23.2142 +        strcpy(prevline, aline);
 23.2143 +    }
 23.2144 +    fclose (infile);
 23.2145 +    if (!pswit[OVERVIEW_SWITCH])
 23.2146 +        for (i = 0; i < MAX_QWORD; i++)
 23.2147 +            if (dupcnt[i])
 23.2148 +                printf("\nNote: Queried word %s was duplicated %d time%s\n", qword[i], dupcnt[i], "s");
 23.2149 +}
 23.2150 +
 23.2151 +
 23.2152 +
 23.2153 +/* flgets - get one line from the input stream, checking for   */
 23.2154 +/* the existence of exactly one CR/LF line-end per line.       */
 23.2155 +/* Returns a pointer to the line.                              */
 23.2156 +
 23.2157 +char *flgets(char *theline, int maxlen, FILE *thefile, long lcnt)
 23.2158 +{
 23.2159 +    char c;
 23.2160 +    int len, isCR, cint;
 23.2161 +
 23.2162 +    *theline = 0;
 23.2163 +    len = isCR = 0;
 23.2164 +    c = cint = fgetc(thefile);
 23.2165 +    do {
 23.2166 +        if (cint == EOF)
 23.2167 +            return (NULL);
 23.2168 +        if (c == 10)  /* either way, it's end of line */
 23.2169 +            if (isCR)
 23.2170 +                break;
 23.2171 +            else {   /* Error - a LF without a preceding CR */
 23.2172 +                if (pswit[LINE_END_SWITCH]) {
 23.2173 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
 23.2174 +                    if (!pswit[OVERVIEW_SWITCH])
 23.2175 +                        printf("    Line %ld - No CR?\n", lcnt);
 23.2176 +                    else
 23.2177 +                        cnt_lineend++;
 23.2178 +                    }
 23.2179 +                break;
 23.2180 +                }
 23.2181 +        if (c == 13) {
 23.2182 +            if (isCR) { /* Error - two successive CRs */
 23.2183 +                if (pswit[LINE_END_SWITCH]) {
 23.2184 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
 23.2185 +                    if (!pswit[OVERVIEW_SWITCH])
 23.2186 +                        printf("    Line %ld - Two successive CRs?\n", lcnt);
 23.2187 +                    else
 23.2188 +                        cnt_lineend++;
 23.2189 +                    }
 23.2190 +                }
 23.2191 +            isCR = 1;
 23.2192 +            }
 23.2193 +        else {
 23.2194 +            if (pswit[LINE_END_SWITCH] && isCR) {
 23.2195 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
 23.2196 +                if (!pswit[OVERVIEW_SWITCH])
 23.2197 +                    printf("    Line %ld column %d - CR without LF?\n", lcnt, len+1);
 23.2198 +                else
 23.2199 +                    cnt_lineend++;
 23.2200 +                }
 23.2201 +             theline[len] = c;
 23.2202 +             len++;
 23.2203 +             theline[len] = 0;
 23.2204 +             isCR = 0;
 23.2205 +             }
 23.2206 +        c = cint = fgetc(thefile);
 23.2207 +    } while(len < maxlen);
 23.2208 +    if (pswit[MARKUP_SWITCH])  
 23.2209 +        postprocess_for_HTML(theline);
 23.2210 +    if (pswit[DP_SWITCH])  
 23.2211 +        postprocess_for_DP(theline);
 23.2212 +    return(theline);
 23.2213 +}
 23.2214 +
 23.2215 +
 23.2216 +
 23.2217 +
 23.2218 +/* mixdigit - takes a "word" as a parameter, and checks whether it   */
 23.2219 +/* contains a mixture of alpha and digits. Generally, this is an     */
 23.2220 +/* error, but may not be for cases like 4th or L5 12s. 3d.           */
 23.2221 +/* Returns 0 if no error found, 1 if error.                          */
 23.2222 +
 23.2223 +int mixdigit(char *checkword)   /* check for digits like 1 or 0 in words */
 23.2224 +{
 23.2225 +    int wehaveadigit, wehavealetter, firstdigits, query, wl;
 23.2226 +    char *s;
 23.2227 +
 23.2228 +
 23.2229 +    wehaveadigit = wehavealetter = query = 0;
 23.2230 +    for (s = checkword; *s; s++)
 23.2231 +        if (gcisalpha(*s))
 23.2232 +            wehavealetter = 1;
 23.2233 +        else
 23.2234 +            if (gcisdigit(*s))
 23.2235 +                wehaveadigit = 1;
 23.2236 +    if (wehaveadigit && wehavealetter) {         /* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
 23.2237 +        query = 1;
 23.2238 +        wl = strlen(checkword);
 23.2239 +        for (firstdigits = 0; gcisdigit(checkword[firstdigits]); firstdigits++)
 23.2240 +            ;
 23.2241 +        /* digits, ending in st, rd, nd, th of either case */
 23.2242 +        /* 0.99 donovan points out an error below. Turns out */
 23.2243 +        /*      I was using matchword like strcmp when the   */
 23.2244 +        /*      return values are different! Duh.            */
 23.2245 +        if (firstdigits + 2 == wl &&
 23.2246 +              (matchword(checkword + wl - 2, "st")
 23.2247 +            || matchword(checkword + wl - 2, "rd")
 23.2248 +            || matchword(checkword + wl - 2, "nd")
 23.2249 +            || matchword(checkword + wl - 2, "th"))
 23.2250 +            )
 23.2251 +                query = 0;
 23.2252 +        if (firstdigits + 3 == wl &&
 23.2253 +              (matchword(checkword + wl - 3, "sts")
 23.2254 +            || matchword(checkword + wl - 3, "rds")
 23.2255 +            || matchword(checkword + wl - 3, "nds")
 23.2256 +            || matchword(checkword + wl - 3, "ths"))
 23.2257 +            )
 23.2258 +                query = 0;
 23.2259 +        if (firstdigits + 3 == wl &&
 23.2260 +              (matchword(checkword + wl - 4, "stly")
 23.2261 +            || matchword(checkword + wl - 4, "rdly")
 23.2262 +            || matchword(checkword + wl - 4, "ndly")
 23.2263 +            || matchword(checkword + wl - 4, "thly"))
 23.2264 +            )
 23.2265 +                query = 0;
 23.2266 +
 23.2267 +        /* digits, ending in l, L, s or d */
 23.2268 +        if (firstdigits + 1 == wl &&
 23.2269 +            (checkword[wl-1] == 'l'
 23.2270 +            || checkword[wl-1] == 'L'
 23.2271 +            || checkword[wl-1] == 's'
 23.2272 +            || checkword[wl-1] == 'd'))
 23.2273 +                query = 0;
 23.2274 +        /* L at the start of a number, representing Britsh pounds, like L500  */
 23.2275 +        /* This is cute. We know the current word is mixeddigit. If the first */
 23.2276 +        /* letter is L, there must be at least one digit following. If both   */
 23.2277 +        /* digits and letters follow, we have a genuine error, else we have a */
 23.2278 +        /* capital L followed by digits, and we accept that as a non-error.   */
 23.2279 +        if (checkword[0] == 'L')
 23.2280 +            if (!mixdigit(checkword+1))
 23.2281 +                query = 0;
 23.2282 +        }
 23.2283 +    return (query);
 23.2284 +}
 23.2285 +
 23.2286 +
 23.2287 +
 23.2288 +
 23.2289 +/* getaword - extracts the first/next "word" from the line, and puts */
 23.2290 +/* it into "thisword". A word is defined as one English word unit    */
 23.2291 +/* -- or at least that's what I'm trying for.                        */
 23.2292 +/* Returns a pointer to the position in the line where we will start */
 23.2293 +/* looking for the next word.                                        */
 23.2294 +
 23.2295 +char *getaword(char *fromline, char *thisword)
 23.2296 +{
 23.2297 +    int i, wordlen;
 23.2298 +    char *s;
 23.2299 +
 23.2300 +    wordlen = 0;
 23.2301 +    for ( ; !gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline ; fromline++ );
 23.2302 +
 23.2303 +    /* V .20                                                                   */
 23.2304 +    /* add a look-ahead to handle exceptions for numbers like 1,000 and 1.35.  */
 23.2305 +    /* Especially yucky is the case of L1,000                                  */
 23.2306 +    /* I hate this, and I see other ways, but I don't see that any is _better_.*/
 23.2307 +    /* This section looks for a pattern of characters including a digit        */
 23.2308 +    /* followed by a comma or period followed by one or more digits.           */
 23.2309 +    /* If found, it returns this whole pattern as a word; otherwise we discard */
 23.2310 +    /* the results and resume our normal programming.                          */
 23.2311 +    s = fromline;
 23.2312 +    for (  ; (gcisdigit(*s) || gcisalpha(*s) || *s == ',' || *s == '.') && wordlen < MAXWORDLEN ; s++ ) {
 23.2313 +        thisword[wordlen] = *s;
 23.2314 +        wordlen++;
 23.2315 +        }
 23.2316 +    thisword[wordlen] = 0;
 23.2317 +    for (i = 1; i < wordlen -1; i++) {
 23.2318 +        if (thisword[i] == '.' || thisword[i] == ',') {
 23.2319 +            if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1])) {   /* we have one of the damned things */
 23.2320 +                fromline = s;
 23.2321 +                return(fromline);
 23.2322 +                }
 23.2323 +            }
 23.2324 +        }
 23.2325 +
 23.2326 +    /* we didn't find a punctuated number - do the regular getword thing */
 23.2327 +    wordlen = 0;
 23.2328 +    for (  ; (gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline == '\'') && wordlen < MAXWORDLEN ; fromline++ ) {
 23.2329 +        thisword[wordlen] = *fromline;
 23.2330 +        wordlen++;
 23.2331 +        }
 23.2332 +    thisword[wordlen] = 0;
 23.2333 +    return(fromline);
 23.2334 +}
 23.2335 +
 23.2336 +
 23.2337 +
 23.2338 +
 23.2339 +
 23.2340 +/* matchword - just a case-insensitive string matcher    */
 23.2341 +/* yes, I know this is not efficient. I'll worry about   */
 23.2342 +/* that when I have a clear idea where I'm going with it.*/
 23.2343 +
 23.2344 +int matchword(char *checkfor, char *thisword)
 23.2345 +{
 23.2346 +    unsigned int ismatch, i;
 23.2347 +
 23.2348 +    if (strlen(checkfor) != strlen(thisword)) return(0);
 23.2349 +
 23.2350 +    ismatch = 1;     /* assume a match until we find a difference */
 23.2351 +    for (i = 0; i <strlen(checkfor); i++)
 23.2352 +        if (toupper(checkfor[i]) != toupper(thisword[i]))
 23.2353 +            ismatch = 0;
 23.2354 +    return (ismatch);
 23.2355 +}
 23.2356 +
 23.2357 +
 23.2358 +
 23.2359 +
 23.2360 +
 23.2361 +/* lowerit - lowercase the line. Yes, strlwr does the same job,  */
 23.2362 +/* but not on all platforms, and I'm a bit paranoid about what   */
 23.2363 +/* some implementations of tolower might do to hi-bit characters,*/
 23.2364 +/* which shouldn't matter, but better safe than sorry.           */
 23.2365 +
 23.2366 +void lowerit(char *theline)
 23.2367 +{
 23.2368 +    for ( ; *theline; theline++)
 23.2369 +        if (*theline >='A' && *theline <='Z')
 23.2370 +            *theline += 32;
 23.2371 +}
 23.2372 +
 23.2373 +
 23.2374 +/* Is this word a Roman Numeral?                                    */
 23.2375 +/* v 0.99 improved to be better. It still doesn't actually          */
 23.2376 +/* validate that the number is a valid Roman Numeral -- for example */
 23.2377 +/* it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not*/
 23.2378 +/* what we're here to do. If it passes this, it LOOKS like a Roman  */
 23.2379 +/* numeral. Anyway, the actual Romans were pretty tolerant of bad   */
 23.2380 +/* arithmetic, or expressions thereof, except when it came to taxes.*/
 23.2381 +/* Allow any number of M, an optional D, an optional CM or CD,      */
 23.2382 +/* any number of optional Cs, an optional XL or an optional XC, an  */
 23.2383 +/* optional IX or IV, an optional V and any number of optional Is.  */
 23.2384 +/* Good enough for jazz chords.                                     */
 23.2385 +
 23.2386 +int isroman(char *t)
 23.2387 +{
 23.2388 +    char *s;
 23.2389 +
 23.2390 +    if (!t || !*t) return (0);
 23.2391 +
 23.2392 +    s = t;
 23.2393 +
 23.2394 +    while (*t == 'm' && *t ) t++;
 23.2395 +    if (*t == 'd') t++;
 23.2396 +    if (*t == 'c' && *(t+1) == 'm') t+=2;
 23.2397 +    if (*t == 'c' && *(t+1) == 'd') t+=2;
 23.2398 +    while (*t == 'c' && *t) t++;
 23.2399 +    if (*t == 'x' && *(t+1) == 'l') t+=2;
 23.2400 +    if (*t == 'x' && *(t+1) == 'c') t+=2;
 23.2401 +    if (*t == 'l') t++;
 23.2402 +    while (*t == 'x' && *t) t++;
 23.2403 +    if (*t == 'i' && *(t+1) == 'x') t+=2;
 23.2404 +    if (*t == 'i' && *(t+1) == 'v') t+=2;
 23.2405 +    if (*t == 'v') t++;
 23.2406 +    while (*t == 'i' && *t) t++;
 23.2407 +    if (!*t) return (1);
 23.2408 +
 23.2409 +    return(0);
 23.2410 +}
 23.2411 +
 23.2412 +
 23.2413 +
 23.2414 +
 23.2415 +/* gcisalpha is a special version that is somewhat lenient on 8-bit texts.     */
 23.2416 +/* If we use the standard isalpha() function, 8-bit accented characters break  */
 23.2417 +/* words, so that tete with accented characters appears to be two words, "t"   */
 23.2418 +/* and "t", with 8-bit characters between them. This causes over-reporting of  */
 23.2419 +/* errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)   */
 23.2420 +/* and ISO-8859-1 character sets, which are the most common PG 8-bit types.    */
 23.2421 +
 23.2422 +int gcisalpha(unsigned char c)
 23.2423 +{
 23.2424 +    if (c >='a' && c <='z') return(1);
 23.2425 +    if (c >='A' && c <='Z') return(1);
 23.2426 +    if (c < 140) return(0);
 23.2427 +    if (c >=192 && c != 208 && c != 215 && c != 222 && c != 240 && c != 247 && c != 254) return(1);
 23.2428 +    if (c == 140 || c == 142 || c == 156 || c == 158 || c == 159) return (1);
 23.2429 +    return(0);
 23.2430 +}
 23.2431 +
 23.2432 +/* gcisdigit is a special version that doesn't get confused in 8-bit texts.    */
 23.2433 +int gcisdigit(unsigned char c)
 23.2434 +{   
 23.2435 +    if (c >= '0' && c <='9') return(1);
 23.2436 +    return(0);
 23.2437 +}
 23.2438 +
 23.2439 +/* gcisletter is a special version that doesn't get confused in 8-bit texts.    */
 23.2440 +/* Yeah, we're ISO-8891-1-specific. So sue me.                                  */
 23.2441 +int gcisletter(unsigned char c)
 23.2442 +{   
 23.2443 +    if ((c >= 'A' && c <='Z') || (c >= 'a' && c <='z') || c >= 192) return(1);
 23.2444 +    return(0);
 23.2445 +}
 23.2446 +
 23.2447 +
 23.2448 +
 23.2449 +
 23.2450 +/* gcstrchr wraps strchr to return NULL if the character being searched for is zero */
 23.2451 +
 23.2452 +char *gcstrchr(char *s, char c)
 23.2453 +{
 23.2454 +    if (c == 0) return(NULL);
 23.2455 +    return(strchr(s,c));
 23.2456 +}
 23.2457 +
 23.2458 +/* postprocess_for_DP is derived from postprocess_for_HTML          */
 23.2459 +/* It is invoked with the -d switch from flgets().                  */
 23.2460 +/* It simply "removes" from the line a hard-coded set of common     */
 23.2461 +/* DP-specific tags, so that the line passed to the main routine has*/
 23.2462 +/* been pre-cleaned of DP markup.                                   */
 23.2463 +
 23.2464 +void postprocess_for_DP(char *theline)
 23.2465 +{
 23.2466 +
 23.2467 +    char *s, *t;
 23.2468 +    int i;
 23.2469 +
 23.2470 +    if (!*theline) 
 23.2471 +        return;
 23.2472 +
 23.2473 +    for (i = 0; *DPmarkup[i]; i++) {
 23.2474 +        s = strstr(theline, DPmarkup[i]);
 23.2475 +        while (s) {
 23.2476 +            t = s + strlen(DPmarkup[i]);
 23.2477 +            while (*t) {
 23.2478 +                *s = *t;
 23.2479 +                t++; s++;
 23.2480 +                }
 23.2481 +            *s = 0;
 23.2482 +            s = strstr(theline, DPmarkup[i]);
 23.2483 +            }
 23.2484 +        }
 23.2485 +
 23.2486 +}
 23.2487 +
 23.2488 +
 23.2489 +/* postprocess_for_HTML is, at the moment (0.97), a very nasty      */
 23.2490 +/* short-term fix for Charlz. Nasty, nasty, nasty.                  */
 23.2491 +/* It is invoked with the -m switch from flgets().                  */
 23.2492 +/* It simply "removes" from the line a hard-coded set of common     */
 23.2493 +/* HTML tags and "replaces" a hard-coded set of common HTML         */
 23.2494 +/* entities, so that the line passed to the main routine has        */
 23.2495 +/* been pre-cleaned of HTML. This is _so_ not the right way to      */
 23.2496 +/* deal with HTML, but what Charlz needs now is not HTML handling   */
 23.2497 +/* proper: just ignoring <i> tags and some others.                  */
 23.2498 +/* To be revisited in future releases!                              */
 23.2499 +
 23.2500 +void postprocess_for_HTML(char *theline)
 23.2501 +{
 23.2502 +
 23.2503 +    if (strstr(theline, "<") && strstr(theline, ">"))
 23.2504 +        while (losemarkup(theline))
 23.2505 +            ;
 23.2506 +    while (loseentities(theline))
 23.2507 +        ;
 23.2508 +}
 23.2509 +
 23.2510 +char *losemarkup(char *theline)
 23.2511 +{
 23.2512 +    char *s, *t;
 23.2513 +    int i;
 23.2514 +
 23.2515 +    if (!*theline) 
 23.2516 +        return(NULL);
 23.2517 +
 23.2518 +    s = strstr(theline, "<");
 23.2519 +    t = strstr(theline, ">");
 23.2520 +    if (!s || !t) return(NULL);
 23.2521 +    for (i = 0; *markup[i]; i++)
 23.2522 +        if (!tagcomp(s+1, markup[i])) {
 23.2523 +            if (!*(t+1)) {
 23.2524 +                *s = 0;
 23.2525 +                return(s);
 23.2526 +                }
 23.2527 +            else
 23.2528 +                if (t > s) {
 23.2529 +                    strcpy(s, t+1);
 23.2530 +                    return(s);
 23.2531 +                    }
 23.2532 +        }
 23.2533 +    /* it's an unrecognized <xxx> */
 23.2534 +    return(NULL);
 23.2535 +}
 23.2536 +
 23.2537 +char *loseentities(char *theline)
 23.2538 +{
 23.2539 +    int i;
 23.2540 +    char *s, *t;
 23.2541 +
 23.2542 +    if (!*theline) 
 23.2543 +        return(NULL);
 23.2544 +
 23.2545 +    for (i = 0; *entities[i].htmlent; i++) {
 23.2546 +        s = strstr(theline, entities[i].htmlent);
 23.2547 +        if (s) {
 23.2548 +            t = malloc((size_t)strlen(s));
 23.2549 +            if (!t) return(NULL);
 23.2550 +            strcpy(t, s + strlen(entities[i].htmlent));
 23.2551 +            strcpy(s, entities[i].textent);
 23.2552 +            strcat(s, t);
 23.2553 +            free(t);
 23.2554 +            return(theline);
 23.2555 +            }
 23.2556 +        }
 23.2557 +
 23.2558 +    /* V0.97 Duh. Forgot to check the htmlnum member */
 23.2559 +    for (i = 0; *entities[i].htmlnum; i++) {
 23.2560 +        s = strstr(theline, entities[i].htmlnum);
 23.2561 +        if (s) {
 23.2562 +            t = malloc((size_t)strlen(s));
 23.2563 +            if (!t) return(NULL);
 23.2564 +            strcpy(t, s + strlen(entities[i].htmlnum));
 23.2565 +            strcpy(s, entities[i].textent);
 23.2566 +            strcat(s, t);
 23.2567 +            free(t);
 23.2568 +            return(theline);
 23.2569 +            }
 23.2570 +        }
 23.2571 +    return(NULL);
 23.2572 +}
 23.2573 +
 23.2574 +
 23.2575 +int tagcomp(char *strin, char *basetag)
 23.2576 +{
 23.2577 +    char *s, *t;
 23.2578 +
 23.2579 +    s = basetag;
 23.2580 +    t  = strin;
 23.2581 +    if (*t == '/') t++; /* ignore a slash */
 23.2582 +    while (*s && *t) {
 23.2583 +        if (tolower(*s) != tolower(*t)) return(1);
 23.2584 +        s++; t++;
 23.2585 +        }
 23.2586 +    /* OK, we have < followed by a valid tag start  */
 23.2587 +    /* should I do something about length?          */
 23.2588 +    /* this is messy. The length of an <i> tag is   */
 23.2589 +    /* limited, but a <table> could go on for miles */
 23.2590 +    /* so I'd have to parse the tags . . . ugh.     */
 23.2591 +    /* It isn't what Charlz needs now, so mark it   */
 23.2592 +    /* as 'pending'.                                */
 23.2593 +    return(0);
 23.2594 +}
 23.2595 +
 23.2596 +void proghelp()                  /* explain program usage here */
 23.2597 +{
 23.2598 +    fputs("V. 0.991. Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
 23.2599 +    fputs("Gutcheck comes wih ABSOLUTELY NO WARRANTY. For details, read the file COPYING.\n", stderr);
 23.2600 +    fputs("This is Free Software; you may redistribute it under certain conditions (GPL);\n", stderr);
 23.2601 +    fputs("read the file COPYING for details.\n\n", stderr);
 23.2602 +    fputs("Usage is: gutcheck [-setpxloyhud] filename\n",stderr);
 23.2603 +    fputs("  where -s checks single quotes, -e suppresses echoing lines, -t checks typos\n",stderr);
 23.2604 +    fputs("  -x (paranoid) switches OFF -t and extra checks, -l turns OFF line-end checks\n",stderr);
 23.2605 +    fputs("  -o just displays overview without detail, -h echoes header fields\n",stderr);
 23.2606 +    fputs("  -v (verbose) unsuppresses duplicate reporting, -m suppresses markup\n",stderr);
 23.2607 +    fputs("  -d ignores DP-specific markup,\n",stderr);
 23.2608 +    fputs("  -u uses a file gutcheck.typ to query user-defined possible typos\n",stderr);
 23.2609 +    fputs("Sample usage: gutcheck warpeace.txt \n",stderr);
 23.2610 +    fputs("\n",stderr);
 23.2611 +    fputs("Gutcheck looks for errors in Project Gutenberg(TM) etexts.\n", stderr);
 23.2612 +    fputs("Gutcheck queries anything it thinks shouldn't be in a PG text; non-ASCII\n",stderr);
 23.2613 +    fputs("characters like accented letters, lines longer than 75 or shorter than 55,\n",stderr);
 23.2614 +    fputs("unbalanced quotes or brackets, a variety of badly formatted punctuation, \n",stderr);
 23.2615 +    fputs("HTML tags, some likely typos. It is NOT a substitute for human judgement.\n",stderr);
 23.2616 +    fputs("\n",stderr);
 23.2617 +}
 23.2618 +
 23.2619 +
 23.2620 +
 23.2621 +/*********************************************************************
 23.2622 +  Revision History:
 23.2623 +
 23.2624 +  04/22/01 Cleaned up some stuff and released .10
 23.2625 +
 23.2626 +           ---------------
 23.2627 +
 23.2628 +  05/09/01 Added the typo list, added two extra cases of he/be error,
 23.2629 +           added -p switch, OPEN_SINGLE QUOTE char as .11
 23.2630 +
 23.2631 +           ---------------
 23.2632 +
 23.2633 +  05/20/01 Increased the typo list,
 23.2634 +           added paranoid mode,
 23.2635 +           ANSIfied the code and added some casts
 23.2636 +              so the compiler wouldn't keep asking if I knew what I was doing,
 23.2637 +           fixed bug in l.s.d. condition (thanks, Dave!),
 23.2638 +           standardized spacing when echoing,
 23.2639 +           added letter-combo checking code to typo section,
 23.2640 +           added more h/b words to typo array.
 23.2641 +           Not too sure about putting letter combos outside of the TYPO conditions -
 23.2642 +           someone is sure to have a book about the tbaka tribe, or something. Anyway, let's see.
 23.2643 +           Released as .12
 23.2644 +
 23.2645 +           ---------------
 23.2646 +
 23.2647 +  06/01/01 Removed duplicate reporting of Tildes, asterisks, etc.
 23.2648 +  06/10/01 Added flgets routine to help with platform-independent
 23.2649 +           detection of invalid line-ends. All PG text files should
 23.2650 +           have CR/LF (13/10) at end of line, regardless of system.
 23.2651 +           Gutcheck now validates this by default. (Thanks, Charles!)
 23.2652 +           Released as .13
 23.2653 +
 23.2654 +           ---------------
 23.2655 +
 23.2656 +  06/11/01 Added parenthesis match checking. (c_brack, cbrack_err etc.)
 23.2657 +           Released as .14
 23.2658 +
 23.2659 +           ---------------
 23.2660 +
 23.2661 +  06/23/01 Fixed: 'No',he said. not being flagged.
 23.2662 +
 23.2663 +           Improved: better single-quotes checking:
 23.2664 +
 23.2665 +           Ignore singlequotes surrounded by alpha, like didn't. (was OK)
 23.2666 +
 23.2667 +           If a singlequote is at the END of a word AND the word ends in "s":
 23.2668 +                  The dogs' tails wagged.
 23.2669 +           it's probably an apostrophe, but less commonly may be a closequote:
 23.2670 +                  "These 'pack dogs' of yours look more like wolves."
 23.2671 +
 23.2672 +           If it's got punctuation before it and is followed by a space
 23.2673 +           or punctuation:
 23.2674 +              . . . was a problem,' he said
 23.2675 +              . . . was a problem,'"
 23.2676 +           it is probably (certainly?) a closequote.
 23.2677 +
 23.2678 +           If it's at start of paragraph, it's probably an openquote.
 23.2679 +              (but watch dialect)
 23.2680 +
 23.2681 +           Words with ' at beginning and end are probably quoted:
 23.2682 +               "You have the word 'chivalry' frequently on your lips."
 23.2683 +               (Not specifically implemented)
 23.2684 +           V.18 I'm glad I didn't implement this, 'cos it jest ain't so
 23.2685 +           where the convention is to punctuate outside the quotes.
 23.2686 +               'Come', he said, 'and join the party'.
 23.2687 +
 23.2688 +           If it is followed by an alpha, and especially a capital:
 23.2689 +              'Hello,' called he.
 23.2690 +           it is either an openquote or dialect.
 23.2691 +
 23.2692 +           Dialect breaks ALL the rules:
 23.2693 +                  A man's a man for a' that.
 23.2694 +                  "Aye, but 'tis all in the pas' now."
 23.2695 +                  "'Tis often the way," he said.
 23.2696 +                  'Ave a drink on me.
 23.2697 +
 23.2698 +           This version looks to be an improvement, and produces
 23.2699 +           fewer false positives, but is still not perfect. The
 23.2700 +           'pack dogs' case still fools it, and dialect is still
 23.2701 +           a problem. Oh, well, it's an improvement, and I have
 23.2702 +           a weighted structure in place for refining guesses at
 23.2703 +           closequotes. Maybe next time, I'll add a bit of logic
 23.2704 +           where if there is an open quote and one that was guessed
 23.2705 +           to be a possessive apostrophe after s, I'll re-guess it
 23.2706 +           to be a closequote. Let's see how this one flies, first.
 23.2707 +
 23.2708 +           (Afterview: it's still crap. Needs much work, and a deeper insight.)
 23.2709 +
 23.2710 +           Released as .15
 23.2711 +
 23.2712 +           TODO: More he/be checks. Can't be perfect - counterexamples:
 23.2713 +              I gave my son good advice: be married regardless of the world's opinion.
 23.2714 +              I gave my son good advice: he married regardless of the world's opinion.
 23.2715 +
 23.2716 +              If by "primitive" be meant "crude", we can understand the sentence.
 23.2717 +              If by "primitive" he meant "crude", we can understand the sentence.
 23.2718 +
 23.2719 +              No matter what be said, I must go on.
 23.2720 +              No matter what he said, I must go on.
 23.2721 +
 23.2722 +              No value, however great, can be set upon them.
 23.2723 +              No value, however great, can he set upon them.
 23.2724 +
 23.2725 +              Real-Life one from a DP International Weekly Miscellany:
 23.2726 +                He wandered through the forest without fear, sleeping
 23.2727 +                much, for in sleep be had companionship--the Great
 23.2728 +                Spirit teaching him what he should know in dreams.
 23.2729 +                That one found by jeebies, and it turned out to be "he".
 23.2730 +
 23.2731 +
 23.2732 +           ---------------
 23.2733 +
 23.2734 +  07/01/01 Added -O option.
 23.2735 +           Improved singlequotes by reporting mismatched single quotes
 23.2736 +           only if an open_single_quotes was found.
 23.2737 +
 23.2738 +           Released as .16
 23.2739 +
 23.2740 +           ---------------
 23.2741 +
 23.2742 +  08/27/01 Added -Y switch for Robert Rowe to allow his app to
 23.2743 +           catch the error output.
 23.2744 +
 23.2745 +           Released as .17
 23.2746 +
 23.2747 +           ---------------
 23.2748 +
 23.2749 +  09/08/01 Added checking Capitals at start of paragraph, but not
 23.2750 +           checking them at start of sentence.
 23.2751 +
 23.2752 +           TODO: Parse sentences out so can check reliably for start of
 23.2753 +                 sentence. Need a whole different approach for that.
 23.2754 +                 (Can't just rely on periods, since they are also
 23.2755 +                 used for abbreviations, etc.)
 23.2756 +
 23.2757 +           Added checking for all vowels or all consonants in a word.
 23.2758 +
 23.2759 +           While I was in, I added "ii" checking and "tl" at start of word.
 23.2760 +
 23.2761 +           Added echoing of first line of paragraph when reporting
 23.2762 +           mismatched quoted or brackets (thanks to David Widger for the
 23.2763 +           suggestion)
 23.2764 +
 23.2765 +           Not querying L at start of a number (used for British pounds).
 23.2766 +
 23.2767 +           The spelling changes are sort of half-done but released anyway
 23.2768 +           Skipped .18 because I had given out a couple of test versions
 23.2769 +           with that number.
 23.2770 +
 23.2771 +  09/25/01 Released as .19
 23.2772 +
 23.2773 +           ---------------
 23.2774 +
 23.2775 +           TODO:
 23.2776 +           Use the logic from my new version of safewrap to stop querying
 23.2777 +             short lines like poems and TOCs.
 23.2778 +           Ignore non-standard ellipses like .  .  . or ...
 23.2779 +
 23.2780 +
 23.2781 +           ---------------
 23.2782 +  10/01/01 Made any line over 80 a VERY long line (was 85).
 23.2783 +           Recognized openquotes on indented paragraphs as continuations
 23.2784 +               of the same speech.
 23.2785 +           Added "cf" to the okword list (how did I forget _that_?) and a few others.
 23.2786 +           Moved abbrev to okword and made it more general.
 23.2787 +           Removed requirement that PG_space_emdash be greater than
 23.2788 +               ten before turning off warnings about spaced dashes.
 23.2789 +           Added period to list of characters that might constitute a separator line.
 23.2790 +           Now checking for double punctuation (Thanks, David!)
 23.2791 +           Now if two spaced em-dashes on a line, reports both. (DW)
 23.2792 +           Bug: Wasn't catching spaced punctuation at line-end since I
 23.2793 +               added flgets in version .13 - fixed.
 23.2794 +           Bug: Wasn't catching spaced singlequotes - fixed
 23.2795 +           Now reads punctuated numbers like 1,000 as a single word.
 23.2796 +               (Used to give "standalone 1" type  queries)
 23.2797 +           Changed paranoid mode - not including s and p options. -ex is now quite usable.
 23.2798 +           Bug: was calling `"For it is perfectly impossible,"    Unspaced Quotes - fixed
 23.2799 +           Bug: Sometimes gave _next_ line number for queried word at end of line - fixed
 23.2800 +
 23.2801 +  10/22/01 Released as .20
 23.2802 +
 23.2803 +           ---------------
 23.2804 +
 23.2805 +           Added count of lines with spaces at end. (cnt_spacend) (Thanks, Brett!)
 23.2806 +           Reduced the number of hi-bit letters needed to stop reporting them
 23.2807 +               from 1/20 to 1/100 or 200 in total.
 23.2808 +           Added PG footer check.
 23.2809 +           Added the -h switch.
 23.2810 +           Fixed platform-specific CHAR_EOL checking for isemptyline - changed to 13 and 10
 23.2811 +           Not reporting ".," when there are many of them, such as a book with many references to "Vol 1., p. 23"
 23.2812 +           Added unspaced brackets check when surrounded by alpha.
 23.2813 +           Removed all typo reporting unless the typo switch is on.
 23.2814 +           Added gcisalpha to ease over-reporting of 8-bit queries.
 23.2815 +           ECHO_SWITCH is now ON by default!
 23.2816 +           PARANOID_SWITCH is now ON by default!
 23.2817 +           Checking for ">From" placed there by e-mail MTA (Thanks Andrew & Greg)
 23.2818 +           Checking for standalone lowercase "l"
 23.2819 +           Checking for standalone lowercase "s"
 23.2820 +           Considering "is be" and "be is" "be was" "was be" as he/be errors
 23.2821 +           Looking at punct at end of para
 23.2822 +
 23.2823 +  01/20/02 Released as .21
 23.2824 +
 23.2825 +           ---------------
 23.2826 +
 23.2827 +           Added VERBOSE_SWITCH to make it list everything. (George Davis)
 23.2828 +
 23.2829 +           ---------------
 23.2830 +
 23.2831 +  02/17/02 Added cint in flgets to try fix an EOF failure on a compiler I don't have.
 23.2832 +           after which
 23.2833 +           This line caused a coredump on Solaris - fixed.
 23.2834 +                Da sagte die Figur: " Das ist alles gar schoen, und man mag die Puppe
 23.2835 +  03/09/02 Changed header recognition for another header change
 23.2836 +           Called it .24
 23.2837 +  03/29/02 Added qword[][] so I can suppress massive overreporting
 23.2838 +           of queried "words" like "FN", "Wm.", "th'", people's 
 23.2839 +           initials, chemical formulae and suchlike in some texts.
 23.2840 +           Called it .25
 23.2841 +  04/07/02 The qword summary reports at end shouldn't show in OVERVIEW mode. Fixed.
 23.2842 +           Added linecounts in overview mode.
 23.2843 +           Wow! gutcheck gutcheck.exe doesn't report a binary! :-) Need to tighten up. Done.
 23.2844 +           "m" is a not uncommon scanno for "in", but also appears in "a.m." - Can I get round that?
 23.2845 +  07/07/02 Added GPL.
 23.2846 +           Added checking for broken em-dash at line-end (enddash)
 23.2847 +           Released as 0.95
 23.2848 +  08/17/02 Fixed a bug that treated some hi-bit characters as spaces. Thanks, Carlo.
 23.2849 +           Released as 0.96
 23.2850 +  10/10/02 Suppressing some annoying multiple reports by default:
 23.2851 +           Standalone Ones, Asterisks, Square Brackets.
 23.2852 +              Digit 1 occurs often in many scientific texts.
 23.2853 +              Asterisk occurs often in multi-footnoted texts.
 23.2854 +              Mismatch Square Brackets occurs often in multi-para footnotes.
 23.2855 +           Added -m switch for Charlz. Horrible. Nasty. Kludgy. Evil.
 23.2856 +              . . . but it does more or less work for the main cases.
 23.2857 +           Removed uppercase within a word as a separate category so
 23.2858 +           that names like VanAllen get reported only once, like other
 23.2859 +           suspected typos.
 23.2860 +  11/24/02 Fixed - -m switch wasn't looking at htmlnum in
 23.2861 +           loseentities (Thanks, Brett!)
 23.2862 +           Fixed bug which occasionally gave false warning of
 23.2863 +           paragraph starting with lowercase.
 23.2864 +           Added underscore as character not to query around doublequotes.
 23.2865 +           Split the "Non-ASCII" message into "Non-ASCII" vs. "Non-ISO-8859"
 23.2866 +           . . . this is to help detect things like CP1252 characters.
 23.2867 +           Released as 0.97
 23.2868 +
 23.2869 +  12/01/02 Hacked a simplified version of the "Wrongspaced quotes" out of gutspell,
 23.2870 +           for doublequotes only. Replaces "Spaced quote", since it also covers that
 23.2871 +           case.
 23.2872 +           Added "warn_hyphen" to ease over-reporting of hyphens.
 23.2873 +
 23.2874 +  12/20/02 Added "extra period" checks.
 23.2875 +           Added single character line check
 23.2876 +           Added I" check - is usually an exclam
 23.2877 +           Released as 0.98
 23.2878 +
 23.2879 +  1/5/03   Eeek! Left in a lowerit(argv[0]) at the start before procfile()
 23.2880 +           from when I was looking at ways to identify markup. Refuses to
 23.2881 +           open files for *nix users with upcase in the filemanes. Removed.
 23.2882 +           Fixed quickly and released as 0.981
 23.2883 +
 23.2884 +  1/8/03   Added "arid" to the list of typos, slightly against my better
 23.2885 +           judgement, but the DP gang are all excited about it. :-)
 23.2886 +           Added a check for comma followed by capital letter, where
 23.2887 +           a period has OCRed into a comma. (DW). Not sure about this
 23.2888 +           either; we'll see.
 23.2889 +           Compiling for Win32 to allow longfilenames.
 23.2890 +
 23.2891 +  6/1/04   A messy test release for DW to include the "gutcheck.typ"
 23.2892 +           process. And the gutcheck.jee trials. Removed "arid" --
 23.2893 +           it can go in gutcheck.typ
 23.2894 +
 23.2895 +           Added checks for carats ^ and slants / but disabling slant
 23.2896 +           queries if more than 20 of them, because some people use them
 23.2897 +           for /italics/. Slants are commonly mistaken italic "I"s.
 23.2898 +
 23.2899 +           Later: removed gutcheck.jee -- wrote jeebies instead.
 23.2900 +
 23.2901 +Random TODO: 
 23.2902 +           Check brackets more closely, like quotes, so that it becomes
 23.2903 +           easy to find the error in long paragraphs full of brackets.
 23.2904 +
 23.2905 +
 23.2906 +  11/4/04  Assorted cleanup. Fixed case where text started with an
 23.2907 +           unbalanced paragraph.
 23.2908 +
 23.2909 +  1/2/05   Has it really been that long? Added "nocomma", "noperiod" check.
 23.2910 +           Bits and pieces: improved isroman(). Added isletter().
 23.2911 +           Other stuff I never noted before this.
 23.2912 +
 23.2913 +  7/3/05   Stuck in a quick start on DP-markup ignoring 
 23.2914 +           at BillFlis's suggestion.
 23.2915 +
 23.2916 +  1/23/06  Took out nocomma etc if typos are off. Why did I ever leave that in?
 23.2917 +           Don't count footer for dotcomma etc.
 23.2918 +
 23.2919 +
 23.2920 +1       I
 23.2921 +ail     all
 23.2922 +arc     are
 23.2923 +arid    and
 23.2924 +bad     had
 23.2925 +ball    hall
 23.2926 +band    hand
 23.2927 +bar     her
 23.2928 +bat     but
 23.2929 +be      he
 23.2930 +bead    head
 23.2931 +beads   heads
 23.2932 +bear    hear
 23.2933 +bit     hit
 23.2934 +bo      be
 23.2935 +boon    been
 23.2936 +borne   home
 23.2937 +bow     how
 23.2938 +bumbled humbled
 23.2939 +car     ear
 23.2940 +carnage carriage
 23.2941 +carne   came
 23.2942 +cast    east
 23.2943 +cat     cut
 23.2944 +cat     eat
 23.2945 +cheek   check
 23.2946 +clay    day
 23.2947 +coining coming
 23.2948 +comer   corner
 23.2949 +die     she
 23.2950 +docs    does
 23.2951 +ease    case
 23.2952 +fail    fall
 23.2953 +fee     he
 23.2954 +haying  having
 23.2955 +ho      he
 23.2956 +ho      who
 23.2957 +hut     but
 23.2958 +is      as
 23.2959 +lie     he
 23.2960 +lime    time
 23.2961 +loth    10th
 23.2962 +m       in
 23.2963 +modem   modern
 23.2964 +Ms      his
 23.2965 +ray     away
 23.2966 +ray     my
 23.2967 +ringer  finger
 23.2968 +ringers fingers
 23.2969 +rioted  noted
 23.2970 +tho     the
 23.2971 +tie     he
 23.2972 +tie     the
 23.2973 +tier    her
 23.2974 +tight   right
 23.2975 +tile    the
 23.2976 +tiling  thing
 23.2977 +tip     up
 23.2978 +tram    train
 23.2979 +tune    time
 23.2980 +u       "
 23.2981 +wen     well
 23.2982 +yon     you
 23.2983 +
 23.2984 +*********************************************************************/
 23.2985 +

    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/bookloupe/bookloupe.typ.in	Fri Jan 27 10:30:16 2012 +0000
    24.3 @@ -0,0 +1,54 @@
    24.4 +11
    24.5 +44
    24.6 +ms
    24.7 +ail
    24.8 +alien
    24.9 +arc
   24.10 +arid
   24.11 +bar
   24.12 +bat
   24.13 +bo
   24.14 +borne
   24.15 +bow
   24.16 +bum
   24.17 +bumbled
   24.18 +carnage
   24.19 +carne
   24.20 +cither
   24.21 +coining
   24.22 +comer
   24.23 +cur
   24.24 +docs
   24.25 +eve
   24.26 +eves
   24.27 +gaming
   24.28 +gram
   24.29 +guru
   24.30 +hag
   24.31 +hare
   24.32 +haying
   24.33 +ho
   24.34 +lime
   24.35 +loth
   24.36 +m
   24.37 +modem
   24.38 +nave
   24.39 +ringer
   24.40 +ringers
   24.41 +riot
   24.42 +rioted
   24.43 +signer
   24.44 +snore
   24.45 +spam
   24.46 +tho
   24.47 +tier
   24.48 +tile
   24.49 +tiling
   24.50 +tram
   24.51 +tum
   24.52 +tune
   24.53 +u
   24.54 +vas
   24.55 +wag
   24.56 +wen
   24.57 +yon

    25.1 --- a/configure.ac	Fri Jan 27 00:28:11 2012 +0000
    25.2 +++ b/configure.ac	Fri Jan 27 10:30:16 2012 +0000
    25.3 @@ -1,13 +1,13 @@
    25.4  #                                               -*- Autoconf -*-
    25.5  # Process this file with autoconf to produce a configure script.
    25.6  
    25.7 -AC_INIT([gutcheck],[1.50],[ali@juiblex.co.uk])
    25.8 +AC_INIT([bookloupe],[1.50],[ali@juiblex.co.uk])
    25.9  AC_PREREQ(2.59)
   25.10  AC_CONFIG_AUX_DIR([config])
   25.11 -AC_CONFIG_SRCDIR([gutcheck/gutcheck.c])
   25.12 +AC_CONFIG_SRCDIR([bookloupe/bookloupe.c])
   25.13  AC_CONFIG_FILES([Makefile
   25.14 -gclib/Makefile
   25.15 -gutcheck/Makefile
   25.16 +bl/Makefile
   25.17 +bookloupe/Makefile
   25.18  test/Makefile
   25.19  test/harness/Makefile
   25.20  test/compatibility/Makefile

    26.1 --- a/doc/Makefile.am	Fri Jan 27 00:28:11 2012 +0000
    26.2 +++ b/doc/Makefile.am	Fri Jan 27 10:30:16 2012 +0000
    26.3 @@ -1,3 +1,3 @@
    26.4 -dist_pkgdata_DATA=gutcheck.txt gc-test.txt
    26.5 +dist_pkgdata_DATA=bookloupe.txt loupe-test.txt
    26.6  
    26.7  EXTRA_DIST=README-0.99

    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/doc/bookloupe.txt	Fri Jan 27 10:30:16 2012 +0000
    27.3 @@ -0,0 +1,742 @@
    27.4 +
    27.5 +
    27.6 +                            Gutcheck documentation
    27.7 +
    27.8 +
    27.9 +gutcheck:  lists possible common formatting errors in a Project
   27.10 +Gutenberg candidate file. It is a command line program and can be used
   27.11 +under Win32 or Unix (gutcheck.c should compile anywhere; if it doesn't,
   27.12 +tell me). For Windows-only people, there is an appendix at the end
   27.13 +with brief instructions for running it.
   27.14 +
   27.15 +
   27.16 +Current version: 0.99. Users of 0.98 see end of file for changes.
   27.17 +
   27.18 +You should also have received the licence file COPYING, a README file, 
   27.19 +gutcheck.c, the source code, and gutcheck.exe, a DOS executable, with
   27.20 +this file.
   27.21 +
   27.22 +This software is Copyright Jim Tinsley 2000-2005.
   27.23 +
   27.24 +Gutcheck comes wih ABSOLUTELY NO WARRANTY. For details, read the file COPYING.
   27.25 +This is Free Software; you may redistribute it under certain conditions (GPL).
   27.26 +
   27.27 +See http://gutcheck.sourceforge.net for the latest version.
   27.28 +
   27.29 +
   27.30 +Usage is: gutcheck [-setopxlywm] filename
   27.31 +      where:
   27.32 +      -s checks Single quotes 
   27.33 +      -e switches off Echoing of lines 
   27.34 +      -t checks Typos
   27.35 +      -o produces an Overview only
   27.36 +      -p sets strict quotes checking for Paragraphs
   27.37 +      -x (paranoid) switches OFF typo checking and extra checks
   27.38 +      -l turns off Line-end checks
   27.39 +      -y sets error messages to stdout
   27.40 +      -w is a special mode for web uploads (for future use)
   27.41 +      -v (verbose) forces individual reporting of minor problems
   27.42 +      -m interprets Markup of some common HTML tags and entities    
   27.43 +      -u warns about words in a user-defined typo file gutcheck.typ 
   27.44 +      -d ignores some DP-specific markup
   27.45 +
   27.46 +Running gutcheck without any parameters will display a brief help message.
   27.47 +
   27.48 +Sample usage: 
   27.49 +
   27.50 +    gutcheck warpeace.txt
   27.51 +
   27.52 +
   27.53 +More detail:
   27.54 +
   27.55 +    Echoing lines (-e to switch off)
   27.56 +
   27.57 +      You may find it convenient, when reviewing Gutcheck's 
   27.58 +      suggestions, to see the line that Gutcheck is questioning.
   27.59 +      That way, you can often see at a glance whether it is
   27.60 +      a real error that needs to be fixed, or a false positive
   27.61 +      that should be in the text, but Gutcheck's limited
   27.62 +      programming doesn't understand.
   27.63 +
   27.64 +      By default, gutcheck echoes these lines, but if you don't 
   27.65 +      want to see the lines referred to, -e will switch it OFF.
   27.66 +
   27.67 +
   27.68 +    Quotes (-s and -p switches)
   27.69 +
   27.70 +      Gutcheck always looks for unbalanced doublequotes in a 
   27.71 +      paragraph. It is a common convention for writers not to
   27.72 +      close quotes in a paragraph if the next paragraph opens
   27.73 +      with quotes and is a continuation by the same speaker.
   27.74 +
   27.75 +      Gutcheck therefore does not normally report unclosed quotes 
   27.76 +      if the next paragraph begins with a quote. If you need
   27.77 +      to see all unclosed quotes, even where the next paragraph
   27.78 +      begins with a quote, you should use the -p switch.
   27.79 +
   27.80 +      Singlequotes (') are a problem, since the same character
   27.81 +      is used for an apostrophe. I'm not sure that it is 
   27.82 +      possible to get 100% accuracy on singlequotes checking,
   27.83 +      particularly since dialect, quite common in PG texts,
   27.84 +      upsets the normal rules so badly. Consider the sentence:
   27.85 +        'Tis often said that a man's a man for a' that.
   27.86 +      As humans, we recognize that both apostrophes are used
   27.87 +      for contractions rather than quotes, but it isn't easy 
   27.88 +      to get a program to recognize that.
   27.89 +
   27.90 +      Since Gutcheck makes too many mistakes when trying to match
   27.91 +      singlequotes, it doesn't look for unbalanced singlequotes
   27.92 +      unless you specify the -s switch.
   27.93 +
   27.94 +      Consider these sentences, which illustrate the main cases:
   27.95 +
   27.96 +        'Tis often said that a fool and his money are soon parted.
   27.97 +
   27.98 +        'Becky's goin' home,' said Tom.
   27.99 +
  27.100 +        The dogs' tails wagged in unison.
  27.101 +
  27.102 +        Those 'pack dogs' of yours look more like wolves.
  27.103 +
  27.104 +
  27.105 +
  27.106 +    Typos (-t switch)
  27.107 +
  27.108 +      It's not Gutcheck's job to be a spelling checker, but it
  27.109 +      does check for a list of common typos and OCR errors if you
  27.110 +      use the -t switch. (The -x switch also turns typo checking on.)
  27.111 +
  27.112 +      It also checks for character combinations, especially involving
  27.113 +      h and b, which are often confused by OCR, that rarely or never
  27.114 +      occur. For example, it queries "tbe" in a word. Now, "the" often
  27.115 +      occurs, but "tbe" is very rare (heartbeat, hotbed), so I'm
  27.116 +      playing the odds - a few false positives for many errors found.
  27.117 +      Similarly with "ii", which is a very common OCR error.
  27.118 +
  27.119 +      Gutcheck suppresses multiple reporting of the first 40 "typos"
  27.120 +      found. This is to remove the annoyance of seeing something like
  27.121 +      "FN" (footnote) or "LK" (initials) flagged as a typo 147 times
  27.122 +      in a text. 
  27.123 +
  27.124 +
  27.125 +    Line-end checking (-l switch to disable)
  27.126 +
  27.127 +      All PG texts should have a Carriage Return (CR - character 13)
  27.128 +      and a Line Feed (LF - character 10) at end of each line,
  27.129 +      regardless of what O/S you made them on. DOS/Windows, Unix
  27.130 +      and Mac have different conventions, but the final text should
  27.131 +      always use a CR/LF pair as its line terminator.
  27.132 +
  27.133 +      By default, Gutcheck verifies that every line does have
  27.134 +      the correct terminator, but if you're on a work-in-progress
  27.135 +      in Linux, you might want to convert the line-ends as a final
  27.136 +      step, and not want to see thousands of errors every time you
  27.137 +      run Gutcheck before that final step, so you can turn off 
  27.138 +      this checking with the -l switch.
  27.139 +
  27.140 +
  27.141 +    Paranoid mode (-x switch to disable: Trust No One :-)
  27.142 +
  27.143 +      -x switches OFF typo-checking, the -t flag, automatically
  27.144 +      and some extra checks like standalone 1 and 0 queries.
  27.145 +
  27.146 +
  27.147 +    Overview mode (-o switch)
  27.148 +
  27.149 +       This mode just gives a count of queries found
  27.150 +       instead of a detailed list.
  27.151 +
  27.152 +
  27.153 +    Header quote  (-h switch)
  27.154 +
  27.155 +       If you use the -h switch, gutcheck will also display
  27.156 +       the Title, Author, Release and Edition fields from the
  27.157 +       PG header. This is useful mostly for the automated
  27.158 +       checks we do on recently-posted texts.
  27.159 +
  27.160 +
  27.161 +    Errors to stdout (-y switch)
  27.162 +
  27.163 +       If you're just running gutcheck normally, you can ignore
  27.164 +       this. It's only there for programs that provide a front
  27.165 +       end to gutcheck. It makes error messages appear within
  27.166 +       the output of gutcheck so that the front end knows whether
  27.167 +       gutcheck ran OK.
  27.168 +
  27.169 +
  27.170 +    Verbose reporting (-v switch)
  27.171 +
  27.172 +       Normally, if gutcheck sees lots of long lines, short lines,
  27.173 +       spaced dashes, non-ASCII characters or dot-commas ".," it
  27.174 +       assumes these are features of the text, counts and summarizes
  27.175 +       them at the top of its report, but does not list them 
  27.176 +       individually. If the -v switch is on, gutcheck will list them all.
  27.177 +
  27.178 +
  27.179 +    Markup interpretation (-m switch)
  27.180 +
  27.181 +       Normally, gutcheck flags anything it suspects of being HTML
  27.182 +       markup as a possible error. When you use the -m switch,
  27.183 +       however, it matches anything that looks like markup against
  27.184 +       a short list of common HTML tags and entities. If the markup
  27.185 +       is in that list, it either ignores the markup, in the case
  27.186 +       of a tag, or "interprets" the markup as its nearest ASCII 
  27.187 +       equivalent, in the case of an entity. So, for example, using
  27.188 +       this switch, gutcheck will "see"
  27.189 +
  27.190 +       &ldquo;He went <i>thataway!</i>&rdquo;
  27.191 +
  27.192 +       as
  27.193 +
  27.194 +       "He went thataway!"
  27.195 +
  27.196 +       and report accordingly.
  27.197 +
  27.198 +       This switch does not, not, NOT check the validity of HTML;
  27.199 +       it exists so that you can run gutcheck on most HTML texts
  27.200 +       for PG, and get sane results. It does not support all tags.
  27.201 +       It does not support all entities. When it sees a tag or entity
  27.202 +       it does not recognize, it will query it as HTML just as if
  27.203 +       you hadn't specified the -m switch.
  27.204 +
  27.205 +       Gutcheck 0.99 will automatically switch on markup interpretation
  27.206 +       if it sees a lot of tags that appear to be markup, so mostly, you
  27.207 +       won't have to specify this.
  27.208 +
  27.209 +    User-defined typos (-u switch)
  27.210 +
  27.211 +        If you have a file named gutcheck.typ either in your current
  27.212 +        working directory or in the directory from which you explicitly
  27.213 +        invoked gutcheck, but not necessarily on your path, and if you
  27.214 +        specify the -u switch, gutcheck will query any word specified 
  27.215 +        in that file. The file is simple: one word, in lower case, per
  27.216 +        line. 999 lines are allowed for. Be careful not to put multiple
  27.217 +        words onto a line, or leave any rubbish other than the word on
  27.218 +        the line. You should have received a sample file gutcheck.typ
  27.219 +        with this package.
  27.220 +
  27.221 +    Ignore DP markup (-d switch)
  27.222 +        
  27.223 +        Distributed Proofreaders (http://www.pgdp.net) is currently
  27.224 +        (2005) the main source of PG texts, and proofers there use
  27.225 +        special conventions. This switch understands those conventions,
  27.226 +        so that people can use gutcheck on files in process that still
  27.227 +        haven't had the special conventions removed yet. The special
  27.228 +        conventions supported in 0.99 are page-separators and
  27.229 +        "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/".
  27.230 +
  27.231 +
  27.232 +You will probably only run gutcheck on a text once or maybe twice,
  27.233 +just prior to uploading; it usually finds a few formatting problems;
  27.234 +it also usually finds queries that aren't problems at all - it often
  27.235 +questions Tables of Contents for having short lines, for example.
  27.236 +These are called "false positives", and need a human to decide on
  27.237 +them.
  27.238 +
  27.239 +The text should be standard prose, and already close to PG normal
  27.240 +format (plain text, about 70 characters per line with blank lines
  27.241 +between paragraphs).
  27.242 +
  27.243 +Gutcheck merely draws your attention to things that might be errors.
  27.244 +It is NOT a substitute for human judgement. Formatting choices like
  27.245 +short lines may be for a reason that this program can't understand.
  27.246 +
  27.247 +Even the most careful human proofing can leave errors behind in a
  27.248 +text, and there are several automated checks you can do to help find
  27.249 +them. Of these, spellchecking (with _very_ careful human judgement) is
  27.250 +the most important and most useful.
  27.251 +
  27.252 +Gutcheck does perform some basic typo-checking if you ask it to,
  27.253 +but its focus is on formatting errors specific to PG texts - 
  27.254 +mismatched quotes, non-ASCII characters, bad spacing, bad line
  27.255 +length, HTML tags perhaps left from a conversion, unbalanced
  27.256 +brackets.
  27.257 +
  27.258 +Suggestions for additional checks would be appreciated and duly 
  27.259 +considered, but no guarantees that they will be implemented.
  27.260 +
  27.261 +
  27.262 +
  27.263 +
  27.264 +                How do _I_ use it?
  27.265 +
  27.266 +Practically everyone I give gutcheck to asks me how _I_ use it.
  27.267 +Well, when I get a text for posting, say filename.txt, I run
  27.268 +
  27.269 +    gutcheck -o filename.txt
  27.270 +
  27.271 +That gives me a quick idea what I'm dealing with. It'll tell
  27.272 +me what kind of problems gutcheck sees, and give me an idea 
  27.273 +of how much more work needs to be done on the text. Keep in 
  27.274 +mind that gutcheck doesn't do anything like a full spellcheck,
  27.275 +but when I see a text that has a lot of problems, I assume that
  27.276 +it probably needs a spellcheck too.
  27.277 +
  27.278 +Having got a feel for the ballpark, I run
  27.279 +
  27.280 +    gutcheck filename.txt > jj
  27.281 +
  27.282 +where jj is my personal, all-purpose filename for temporary data
  27.283 +that doesn't need to be kept. Then I open filename.txt and jj in
  27.284 +a split-screen view in my editor, and work down the text, fixing
  27.285 +whatever needs fixing, and skipping whatever doesn't. If your 
  27.286 +editor doesn't split-screen, you can get much the same effect by 
  27.287 +opening your original file in your normal editor, and jj (or your
  27.288 +equivalent name) in something like Notepad, keeping both in view 
  27.289 +at the same time.
  27.290 +
  27.291 +Twice a day, an automatic process looks at all recently-posted
  27.292 +texts, and emails Michael, me, and sometimes other people with
  27.293 +their gutcheck summaries.
  27.294 +
  27.295 +
  27.296 +
  27.297 +        Future development of gutcheck
  27.298 +
  27.299 +Gutcheck has gone about as far as it can, given its current
  27.300 +structure. In order to add better singlequotes checking,
  27.301 +sentence checking, better he/be checking and other good stuff
  27.302 +that I'd like to see, I'll have to rewrite it from a different
  27.303 +angle - looking at the syntax instead of the lines. And I'll
  27.304 +probably get around to that sooner or later.
  27.305 +
  27.306 +Meantime, I'm just trying to get this version stabilized, so
  27.307 +please report any bugs you find. When it is stable, I'll run
  27.308 +up a Windows port for those timid souls who can't look a 
  27.309 +command line in the eye. :-)
  27.310 +
  27.311 +And I've started work on gutspell, a companion to gutcheck
  27.312 +which will concentrate on spelling problems. PG spelling
  27.313 +problems are unusual, since the range of texts we cover is
  27.314 +so wide, and I'll be taking a somewhat unorthodox approach
  27.315 +to writing this spelling-checker _specifically_ for texts
  27.316 +containing a lot of dialect and uncommon words that have
  27.317 +probably already been spell-checked against a standard
  27.318 +modern dictionary.
  27.319 +
  27.320 +
  27.321 +
  27.322 +
  27.323 +Explanations of common gutcheck messages:
  27.324 +
  27.325 +    --> 74 lines in this file have white space at end
  27.326 +
  27.327 +    PG texts shouldn't have extra white space added at end of line.
  27.328 +    Don't worry too much about this; they're not doing any harm,
  27.329 +    and they'll be removed during posting anyway.
  27.330 +
  27.331 +
  27.332 +    --> 348 lines in this file are short. Not reporting short lines.
  27.333 +    --> 84 lines in this file are long. Not reporting long lines.
  27.334 +    --> 8 lines in this file are VERY long!
  27.335 +
  27.336 +    If there are a lot of long or short lines, Gutcheck won't list
  27.337 +    them individually. The short lines version of this message
  27.338 +    is commonly seen when gutchecking poetry and some plays, where
  27.339 +    the normal line length is shorter than the standard for prose.
  27.340 +    A "VERY long" line is one over 80 characters.  You normally
  27.341 +    shouldn't have any of these, but sometimes you may have to render
  27.342 +    a table that must be that long, or some special preformatted
  27.343 +    quotation that can't be broken.
  27.344 +
  27.345 +
  27.346 +    --> There are 75 spaced dashes and em-dashes in this file. Not reporting them.
  27.347 +
  27.348 +    The PG standard for an emdash--like these--is two minus signs
  27.349 +    with no spaces before or after them. However, some older texts
  27.350 +    used spaced dashes - like these -- and if there are very many
  27.351 +    such spaced dashes in the file, gutcheck just draws your
  27.352 +    attention to it and doesn't list them individually.
  27.353 +
  27.354 +
  27.355 +
  27.356 +    Line 3020 - Non-ASCII character 233
  27.357 +
  27.358 +    Standard PG texts should use only ASCII characters with values
  27.359 +    up to 127; however, non-English, accented characters can be 
  27.360 +    represented according to several different non-ASCII encoding 
  27.361 +    schemes, using values over 127. If you have a plain English text
  27.362 +    with a few accented characters in words like cafe or tete-a-tete,
  27.363 +    you should replace the accented characters with their unaccented 
  27.364 +    versions. The English pound sign is another commonly-seen
  27.365 +    non-ASCII character. If you have enough non-ASCII characters in
  27.366 +    your text that you feel removing them would degrade your text
  27.367 +    unacceptably, you should probably consider doing an 8-bit text
  27.368 +    as well as a plain-ASCII version.
  27.369 +
  27.370 +
  27.371 +
  27.372 +    Line 1207 - Non-ISO-8859 character 156
  27.373 +
  27.374 +    Even in "8-bit" texts, there are distinctions between code sets.
  27.375 +    The ISO-8859 family of 8-bit code sets is the most commonly used
  27.376 +    in PG, and these sets do not define values in the range 128 through
  27.377 +    159 as printable characters. It's quite common for someone on a
  27.378 +    Windows or Mac machine to use a non-ISO character inadvertently,
  27.379 +    so this message warns that the character is not only not ASCII,
  27.380 +    but also outside the ISO-8859 range.
  27.381 +
  27.382 +
  27.383 +
  27.384 +    Line 46 - Tab character?
  27.385 +
  27.386 +    Some editors and WPs will put in Tab characters (character 9) to
  27.387 +    indicate indented text. You should not use these in a PG text,
  27.388 +    because you can't be sure how they will appear on a reader's
  27.389 +    screen. Find the Tab, and replace it with the appropriate number
  27.390 +    of spaces.
  27.391 +
  27.392 +
  27.393 +    Line 1327 - Tilde character?
  27.394 +
  27.395 +    The tilde character (~) might be legitimately used, but it's the
  27.396 +    character commonly used by OCR software to indicate a place where
  27.397 +    it couldn't make out the letter, so gutcheck flags it.
  27.398 +
  27.399 +
  27.400 +
  27.401 +    Line 1347 - Asterisk?
  27.402 +
  27.403 +    Asterisks are reported only in paranoid mode (see -x). 
  27.404 +    Like tildes, they are often used to indicate errors, but they are
  27.405 +    also legitimately used as line delimiters and footnote markers.
  27.406 +
  27.407 +
  27.408 +
  27.409 +    Line 1451 - Long line 129
  27.410 +
  27.411 +    PG texts should have lines shorter than 76. There may be occasions
  27.412 +    where you decide that you really have to go out to 79 characters,
  27.413 +    but the sample above says that line 1451 is 129 characters long -
  27.414 +    probably two lines run together.
  27.415 +
  27.416 +
  27.417 +
  27.418 +    Line 1590 - Short line?
  27.419 +
  27.420 +    PG texts should have lines longer than 54 characters. However,
  27.421 +    there are special cases like poetry and tables of contents where
  27.422 +    the lines _should_ be shorter. So treat Gutcheck warnings about
  27.423 +    short lines carefully. Sometimes it's a genuine formatting
  27.424 +    problem; sometimes the line really needs to be short.
  27.425 +
  27.426 +    Hint: gutcheck will not flag lines as short if they are indented
  27.427 +    - if they start with a space. I like to start inserted stanzas
  27.428 +    and other such items indented with a couple of spaces so that 
  27.429 +    they stand out from the main text anyway.
  27.430 +
  27.431 +
  27.432 +
  27.433 +    Line 1804 - Begins with punctuation?
  27.434 +
  27.435 +    Lines should normally not begin with commas, periods and so on.
  27.436 +    An exception is ellipses . . . which can happen at start of line.
  27.437 +
  27.438 +
  27.439 +
  27.440 +    Line 1850 - Spaced em-dash?
  27.441 +
  27.442 +    The PG standard for an em-dash--like these--is two minus signs
  27.443 +    with no spaces before or after them. Gutcheck flags non-PG
  27.444 +    em-dashes - like this one. Normally, you will replace it with a 
  27.445 +    PG-standard em-dash.
  27.446 +
  27.447 +
  27.448 +
  27.449 +    Line 1904 - Query he/be error?
  27.450 +
  27.451 +    Gutcheck makes a very minor effort to look for that scourge of all
  27.452 +    proofreaders, "be" replacing "he" or vice-versa, and draws your
  27.453 +    attention to it when it thinks it has found one.
  27.454 +
  27.455 +
  27.456 +
  27.457 +    Line 2017 - Query digit in a1most
  27.458 +
  27.459 +    The digit 1 is commonly OCRed for the letter l, the digit 0 for
  27.460 +    the letter O, and so on. When gutcheck sees a mix of digits and
  27.461 +    letters, it warns you. It may generate a false positive for
  27.462 +    something like 7am.
  27.463 +
  27.464 +
  27.465 +
  27.466 +    Line 2083 - Query standalone 0
  27.467 +
  27.468 +    In paranoid mode (see -x) only, gutcheck warns about the digit 0 
  27.469 +    and the number 1 standing alone as a word. This can happen if the 
  27.470 +    OCR misreads the words O or I.
  27.471 +
  27.472 +
  27.473 +
  27.474 +    Line 2115 - Query word whetber
  27.475 +
  27.476 +    If you have switched typo-checking on, gutcheck looks for
  27.477 +    potential typos, especially common h/b errors. It's not
  27.478 +    infallible; it sometimes queries legit words, but it's
  27.479 +    always worth taking a look.
  27.480 +
  27.481 +
  27.482 +
  27.483 +    Line 2190 column 14 - Missing space?
  27.484 +
  27.485 +    Omitting a space is a very common error,especially coming from
  27.486 +    OCRed text,and can be hard for a human to spot. The commas in
  27.487 +    the previous sentence illustrate the kind of thing I mean.
  27.488 +
  27.489 +
  27.490 +
  27.491 +    Line 2240 column 48 - Spaced punctuation?
  27.492 +
  27.493 +    The flip side of the "missing space" error , here , is when extra
  27.494 +    spaces are added before punctuation . Some old texts appear to add
  27.495 +    extra spaces around punctuation consistently, but this was a
  27.496 +    typographical convention rather than the author's intent, and the
  27.497 +    extra "spaces" should be removed when preparing a PG text.
  27.498 +
  27.499 +
  27.500 +
  27.501 +    Line 2301 column 19 - Unspaced quotes?
  27.502 +
  27.503 +    Another common spacing problem occurs in a phrase like "You wait
  27.504 +    there,"he said.
  27.505 +
  27.506 +
  27.507 +
  27.508 +    Line 2385 column 27 - Wrongspaced quotes?
  27.509 +
  27.510 +    As of version 0.98, gutcheck adds extra checks on whether a quote
  27.511 +    seems to be a start or end quote, and queries those that appear to
  27.512 +    be misplaced. This does give rise to false positives when quotes are
  27.513 +    nested, for example:
  27.514 +
  27.515 +    "And how," she asked, "will your "friends" help you now?"
  27.516 +
  27.517 +    but these false positives are worth it because of the many cases
  27.518 +    that this test catches, notably those like:
  27.519 +
  27.520 +    "And how, "she said," will your friends help you now?"
  27.521 +
  27.522 +    Sometimes a "wrongspaced quotes" query will arise because an earlier
  27.523 +    quote in the paragraph was omitted, so if the place specified seems
  27.524 +    to be OK, look back to see whether there's a problem in the preceding
  27.525 +    lines.
  27.526 +
  27.527 +
  27.528 +
  27.529 +    Line 2400 - HTML Tag? <PRE>
  27.530 +
  27.531 +    Some PG texts have been converted from HTML, and not all of the
  27.532 +    HTML tags have been removed.
  27.533 +
  27.534 +
  27.535 +
  27.536 +    Line 2402 - HTML symbol? &emdash;
  27.537 +
  27.538 +    Similarly, special HTML symbol characters can survive into PG
  27.539 +    texts. Can occasionally produce amusing false positives like
  27.540 +    . . . Marwick & Co were well known for it;
  27.541 +
  27.542 +
  27.543 +
  27.544 +    Line 2540 - Mismatched quotes
  27.545 +
  27.546 +    Another gutcheck mainstay - unclosed doublequotes in a paragraph.
  27.547 +    See the discussion of quotes in the switches section near the
  27.548 +    start of this file.
  27.549 +    
  27.550 +    Since the mismatch doesn't occur on any one line, gutcheck quotes
  27.551 +    the line number of the first blank line following the paragraph,
  27.552 +    since this is the point where it reconciles the count of quotes.
  27.553 +    However, if gutcheck is echoing lines, that is, you haven't used
  27.554 +    the -e switch, it will show the _first_ line of the paragraph, 
  27.555 +    to help you find the place without using line numbers. The 
  27.556 +    offending paragraph is therefore between the quoted line and 
  27.557 +    the line number given.
  27.558 +
  27.559 +
  27.560 +
  27.561 +    Line 2587 - Mismatched single quotes
  27.562 +
  27.563 +    Only checked with the -s switch, since checking single quotes is 
  27.564 +    not a very reliable process. Otherwise, the same logic as for 
  27.565 +    doublequotes applies.
  27.566 +
  27.567 +
  27.568 +
  27.569 +    Line 2877 - Mismatched round brackets?
  27.570 +
  27.571 +    Also curly and square brackets. Texts with a lot of brackets, like
  27.572 +    plays with bracketed stage instructions, may have mismatches.
  27.573 +
  27.574 +
  27.575 +    Line 3150 - No CR?
  27.576 +    Line 3204 - Two successive CRs?
  27.577 +    Line 3281 position 75 - CR without LF?
  27.578 +
  27.579 +    These are the invalid line-end warnings. See the discussion of
  27.580 +    line-end checking in the switches section near the start of this
  27.581 +    file. If you see these, and your editor doesn't show anything
  27.582 +    wrong, you should probably try deleting the characters just before
  27.583 +    and after the line end, and the line-end itself, then retyping the
  27.584 +    characters and the line-end.
  27.585 +
  27.586 +
  27.587 +    Line 2940 - Paragraph starts with lower-case
  27.588 +
  27.589 +    A common error in an e-text is for an extra blank line
  27.590 +
  27.591 +    to be put in, like the blank line above, and this often
  27.592 +    shows up as a new paragraph beginning with lower case.
  27.593 +    Sometimes the blank line is deliberate, as when a 
  27.594 +    quotation is inserted in a speech. Use your judgement.
  27.595 +
  27.596 +
  27.597 +    Line 2987 - Extra period?
  27.598 +
  27.599 +    An extra period. is a. common problem in OCRed text. and usually
  27.600 +    arises when a speck of dust on the page is mistaken for a period.
  27.601 +    or. as occasionally happens. when a comma loses its tail.
  27.602 +
  27.603 +
  27.604 +    Line 3012 column 12 - Double punctuation?
  27.605 +
  27.606 +    Double punctuation., like that,, is a common typo and
  27.607 +    scanno. Some books have much legit double punctuation,
  27.608 +    like etc., etc., but it's worth checking anyway.
  27.609 +
  27.610 +
  27.611 +
  27.612 +            *       *       *        *
  27.613 +
  27.614 +For Windows-only users who are unfamiliar with DOS:
  27.615 +
  27.616 +    If you're a Windows-only user, you need to save
  27.617 +    gutcheck.exe into the folder (directory) where the
  27.618 +    text file you want to check is. Let's say your
  27.619 +    text file is in C:\GUT, then you should save
  27.620 +    GUTCHECK.EXE into C:\GUT.
  27.621 +
  27.622 +    Now get to a DOS prompt. You can do this by
  27.623 +    selecting the "Command Prompt" or "MS-DOS Prompt"
  27.624 +    option that will be somewhere on your
  27.625 +    Start/Programs menu.
  27.626 +
  27.627 +    Now get into the C:\GUT directory. 
  27.628 +    You can do this using the CD (change directory) 
  27.629 +    command, like this:
  27.630 +        CD \GUT
  27.631 +    and your prompt will change to 
  27.632 +        C:\GUT>
  27.633 +    so you know you're in the right place.
  27.634 +
  27.635 +    Now type
  27.636 +        gutcheck yourfile.txt
  27.637 +    and you'll see gutcheck's report
  27.638 +
  27.639 +    By default, gutcheck prints its queries to screen.
  27.640 +    If you want to create a file of them, to edit
  27.641 +    against the text, you can use the greater-than
  27.642 +    sign (>) to tell it to output the report to a
  27.643 +    file. For example, if you want its report in a
  27.644 +    file called QUERIES.LST, you could type
  27.645 +    
  27.646 +        gutcheck yourfile.txt > queries.lst
  27.647 +
  27.648 +    The queries.lst file will then contain the listing
  27.649 +    of possible formatting errors, and you can
  27.650 +    edit it alongside your text.
  27.651 +
  27.652 +    Whatever you do, DON'T make the filename after
  27.653 +    the greater-than sign the name of a file already
  27.654 +    on your disk that you want to keep, because
  27.655 +    the greater-than sign will cause gutcheck to
  27.656 +    replace any existing file of that name.
  27.657 +
  27.658 +    So, for example, if you have two Tolstoy files
  27.659 +    that you want to check, called WARPEACE.TXT and 
  27.660 +    ANNAK.TXT, make sure that neither of these names
  27.661 +    is ever used following the greater-than sign.
  27.662 +    To check these correctly, you might do:
  27.663 +
  27.664 +    gutcheck warpeace.txt >war.lst
  27.665 +
  27.666 +    and
  27.667 +
  27.668 +    gutcheck annak.txt > annak.lst
  27.669 +
  27.670 +    separately. Then you can look at war.lst and annak.lst
  27.671 +    to see the gutcheck reports.
  27.672 +
  27.673 +            *       *       *        *
  27.674 +
  27.675 +
  27.676 +For existing 0.98 users upgrading to 0.99:
  27.677 +
  27.678 +    If you run on old 16-bit DOS or Windows 3.x, I'm afraid
  27.679 +    you're out of luck. I'm not saying it _can't_ be compiled
  27.680 +    to run on 16-bit, but the executable with the package is
  27.681 +    for Win32 only. *nix users won't notice the change at all.
  27.682 +
  27.683 +
  27.684 +    There are two new switches: -u and -d. 
  27.685 +          See above for full rundown.
  27.686 +
  27.687 +
  27.688 +Here's a list of the new errors:
  27.689 +
  27.690 +    Line 1456 - Carat character?
  27.691 +
  27.692 +    I^ve found a few.
  27.693 +
  27.694 +
  27.695 +    Line 1821 - Forward slash?
  27.696 +
  27.697 +    Common error for italicized "I", or so /'ve found.
  27.698 +
  27.699 +
  27.700 +    Line 2139 - Query missing paragraph break?
  27.701 +
  27.702 +    "Come here, son." "Do I _have_ to go, dad?"
  27.703 +    Like that. False positives in some texts. Sorry 'bout that,
  27.704 +    but these are often errors.
  27.705 +
  27.706 +
  27.707 +    Line 2200 - Query had/bad error?
  27.708 +
  27.709 +    Clear enough. Doesn't catch as many as I'd like it to,
  27.710 +    but rarely gives false alarms.
  27.711 +
  27.712 +
  27.713 +    Line 2268 - Query punctuation after the?
  27.714 +
  27.715 +    Some words, like "the", very rarely have punctuation
  27.716 +    following them. Others, like "Mrs", usually have a
  27.717 +    period, but never a comma. Occasional false positives.
  27.718 +
  27.719 +
  27.720 +    Line 2380 - Query possible scanno arid
  27.721 +
  27.722 +    It found one of your user-defined typos when you
  27.723 +    used the -u switch.
  27.724 +
  27.725 +
  27.726 +    Line 2511 - Capital "S"?
  27.727 +
  27.728 +    Surprisingly common specific case, like: Jane'S 
  27.729 +
  27.730 +    
  27.731 +    Line 3469 - endquote missing punctuation?
  27.732 +
  27.733 +    OK. This one can really cause a lot of false positives
  27.734 +    in some books, but it switches itself off if it finds
  27.735 +    more than 20 in a text, unless you force it to list them
  27.736 +    all with the -v switch.
  27.737 +    "Hey, dad" Johnny said, "can we go now?"
  27.738 +    is a common punctuation-missing error.
  27.739 +
  27.740 +
  27.741 +    Line 4266 - Mismatched underscores?
  27.742 +
  27.743 +    Like mismatched anything else!
  27.744 +
  27.745 +

    28.1 --- a/doc/gc-test.txt	Fri Jan 27 00:28:11 2012 +0000
    28.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.3 @@ -1,64 +0,0 @@
    28.4 -                            gutcheck test framework
    28.5 -                            =======================
    28.6 -
    28.7 -Running existing testcases
    28.8 ---------------------------
    28.9 -
   28.10 -The test harness (the program that runs a test) is called gc-test. The various
   28.11 -testcases are stored in multiple text files, typically with a .tst extension.
   28.12 -
   28.13 -To run a testcase when all of gutcheck, gc-test and the testcase file are
   28.14 -in the current directory simply do something like:
   28.15 -
   28.16 -% gc-test missing-space.tst
   28.17 -
   28.18 -from a command prompt. Under MS-Windows, this is called a command window and
   28.19 -the prompt will normally look slightly different, eg.,
   28.20 -
   28.21 -C:\DP> gc-test missing-space.tst
   28.22 -
   28.23 -To run all the tests in the current directory, do something like this:
   28.24 -
   28.25 -% gc-test *.tst
   28.26 -
   28.27 -If gutcheck is not in the current directory, then you can set an environment
   28.28 -variable (GUTCHECK) to point at it. For example, on MS-Windows you might do:
   28.29 -
   28.30 -C:\DP> set GUTCHECK=C:\GUTCHECK\GUTCHECK.EXE
   28.31 -C:\DP> gc-test *.tst
   28.32 -
   28.33 -Writing your own testcases
   28.34 ---------------------------
   28.35 -
   28.36 -Writing a new testcase is pretty painless. Most testcases follow this simple
   28.37 -pattern:
   28.38 -
   28.39 -		â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
   28.40 -		â”‚**************** INPUT ****************   â”‚
   28.41 -		â”‚"Look!John, over there!"                  â”‚
   28.42 -		â”‚**************** EXPECTED ****************â”‚
   28.43 -		â”‚                                          â”‚
   28.44 -		â”‚"Look!John, over there!"                  â”‚
   28.45 -		â”‚    Line 1 column 6 - Missing space?      â”‚
   28.46 -		â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
   28.47 -
   28.48 -The sixteen asterisks in this example form what is known as the "flag". This
   28.49 -flag must come before and after all tags (eg., INPUT and EXPECTED). In the
   28.50 -unlikely event that you need sixteen asterisks at the start of line of text,
   28.51 -then simply choose a different flag and use it throughout the file (flags
   28.52 -can be any sequence of ASCII characters except control codes and space).
   28.53 -
   28.54 -Note that the header that gutcheck normally outputs is not included in the
   28.55 -expected output. This avoids problems with not knowing beforehand the name
   28.56 -of the file that gutcheck will be asked to look at (and saves typing!).
   28.57 -gutcheck prints a blank line before each warning. These are not part of the
   28.58 -header and so do need to be included.
   28.59 -
   28.60 -To test that gutcheck produces no output, you still need to include
   28.61 -an EXPECTED tag, just with no text following it. If there is no EXPECTED
   28.62 -tag, then gc-test will consider that no expectation exists and won't check
   28.63 -the output at all.
   28.64 -
   28.65 -There is no support yet for non-ASCII testcases, embedded linefeeds,
   28.66 -passing command line options to gutcheck or for testcases which are
   28.67 -expected to fail.

    29.1 --- a/doc/gutcheck.txt	Fri Jan 27 00:28:11 2012 +0000
    29.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.3 @@ -1,742 +0,0 @@
    29.4 -
    29.5 -
    29.6 -                            Gutcheck documentation
    29.7 -
    29.8 -
    29.9 -gutcheck:  lists possible common formatting errors in a Project
   29.10 -Gutenberg candidate file. It is a command line program and can be used
   29.11 -under Win32 or Unix (gutcheck.c should compile anywhere; if it doesn't,
   29.12 -tell me). For Windows-only people, there is an appendix at the end
   29.13 -with brief instructions for running it.
   29.14 -
   29.15 -
   29.16 -Current version: 0.99. Users of 0.98 see end of file for changes.
   29.17 -
   29.18 -You should also have received the licence file COPYING, a README file, 
   29.19 -gutcheck.c, the source code, and gutcheck.exe, a DOS executable, with
   29.20 -this file.
   29.21 -
   29.22 -This software is Copyright Jim Tinsley 2000-2005.
   29.23 -
   29.24 -Gutcheck comes wih ABSOLUTELY NO WARRANTY. For details, read the file COPYING.
   29.25 -This is Free Software; you may redistribute it under certain conditions (GPL).
   29.26 -
   29.27 -See http://gutcheck.sourceforge.net for the latest version.
   29.28 -
   29.29 -
   29.30 -Usage is: gutcheck [-setopxlywm] filename
   29.31 -      where:
   29.32 -      -s checks Single quotes 
   29.33 -      -e switches off Echoing of lines 
   29.34 -      -t checks Typos
   29.35 -      -o produces an Overview only
   29.36 -      -p sets strict quotes checking for Paragraphs
   29.37 -      -x (paranoid) switches OFF typo checking and extra checks
   29.38 -      -l turns off Line-end checks
   29.39 -      -y sets error messages to stdout
   29.40 -      -w is a special mode for web uploads (for future use)
   29.41 -      -v (verbose) forces individual reporting of minor problems
   29.42 -      -m interprets Markup of some common HTML tags and entities    
   29.43 -      -u warns about words in a user-defined typo file gutcheck.typ 
   29.44 -      -d ignores some DP-specific markup
   29.45 -
   29.46 -Running gutcheck without any parameters will display a brief help message.
   29.47 -
   29.48 -Sample usage: 
   29.49 -
   29.50 -    gutcheck warpeace.txt
   29.51 -
   29.52 -
   29.53 -More detail:
   29.54 -
   29.55 -    Echoing lines (-e to switch off)
   29.56 -
   29.57 -      You may find it convenient, when reviewing Gutcheck's 
   29.58 -      suggestions, to see the line that Gutcheck is questioning.
   29.59 -      That way, you can often see at a glance whether it is
   29.60 -      a real error that needs to be fixed, or a false positive
   29.61 -      that should be in the text, but Gutcheck's limited
   29.62 -      programming doesn't understand.
   29.63 -
   29.64 -      By default, gutcheck echoes these lines, but if you don't 
   29.65 -      want to see the lines referred to, -e will switch it OFF.
   29.66 -
   29.67 -
   29.68 -    Quotes (-s and -p switches)
   29.69 -
   29.70 -      Gutcheck always looks for unbalanced doublequotes in a 
   29.71 -      paragraph. It is a common convention for writers not to
   29.72 -      close quotes in a paragraph if the next paragraph opens
   29.73 -      with quotes and is a continuation by the same speaker.
   29.74 -
   29.75 -      Gutcheck therefore does not normally report unclosed quotes 
   29.76 -      if the next paragraph begins with a quote. If you need
   29.77 -      to see all unclosed quotes, even where the next paragraph
   29.78 -      begins with a quote, you should use the -p switch.
   29.79 -
   29.80 -      Singlequotes (') are a problem, since the same character
   29.81 -      is used for an apostrophe. I'm not sure that it is 
   29.82 -      possible to get 100% accuracy on singlequotes checking,
   29.83 -      particularly since dialect, quite common in PG texts,
   29.84 -      upsets the normal rules so badly. Consider the sentence:
   29.85 -        'Tis often said that a man's a man for a' that.
   29.86 -      As humans, we recognize that both apostrophes are used
   29.87 -      for contractions rather than quotes, but it isn't easy 
   29.88 -      to get a program to recognize that.
   29.89 -
   29.90 -      Since Gutcheck makes too many mistakes when trying to match
   29.91 -      singlequotes, it doesn't look for unbalanced singlequotes
   29.92 -      unless you specify the -s switch.
   29.93 -
   29.94 -      Consider these sentences, which illustrate the main cases:
   29.95 -
   29.96 -        'Tis often said that a fool and his money are soon parted.
   29.97 -
   29.98 -        'Becky's goin' home,' said Tom.
   29.99 -
  29.100 -        The dogs' tails wagged in unison.
  29.101 -
  29.102 -        Those 'pack dogs' of yours look more like wolves.
  29.103 -
  29.104 -
  29.105 -
  29.106 -    Typos (-t switch)
  29.107 -
  29.108 -      It's not Gutcheck's job to be a spelling checker, but it
  29.109 -      does check for a list of common typos and OCR errors if you
  29.110 -      use the -t switch. (The -x switch also turns typo checking on.)
  29.111 -
  29.112 -      It also checks for character combinations, especially involving
  29.113 -      h and b, which are often confused by OCR, that rarely or never
  29.114 -      occur. For example, it queries "tbe" in a word. Now, "the" often
  29.115 -      occurs, but "tbe" is very rare (heartbeat, hotbed), so I'm
  29.116 -      playing the odds - a few false positives for many errors found.
  29.117 -      Similarly with "ii", which is a very common OCR error.
  29.118 -
  29.119 -      Gutcheck suppresses multiple reporting of the first 40 "typos"
  29.120 -      found. This is to remove the annoyance of seeing something like
  29.121 -      "FN" (footnote) or "LK" (initials) flagged as a typo 147 times
  29.122 -      in a text. 
  29.123 -
  29.124 -
  29.125 -    Line-end checking (-l switch to disable)
  29.126 -
  29.127 -      All PG texts should have a Carriage Return (CR - character 13)
  29.128 -      and a Line Feed (LF - character 10) at end of each line,
  29.129 -      regardless of what O/S you made them on. DOS/Windows, Unix
  29.130 -      and Mac have different conventions, but the final text should
  29.131 -      always use a CR/LF pair as its line terminator.
  29.132 -
  29.133 -      By default, Gutcheck verifies that every line does have
  29.134 -      the correct terminator, but if you're on a work-in-progress
  29.135 -      in Linux, you might want to convert the line-ends as a final
  29.136 -      step, and not want to see thousands of errors every time you
  29.137 -      run Gutcheck before that final step, so you can turn off 
  29.138 -      this checking with the -l switch.
  29.139 -
  29.140 -
  29.141 -    Paranoid mode (-x switch to disable: Trust No One :-)
  29.142 -
  29.143 -      -x switches OFF typo-checking, the -t flag, automatically
  29.144 -      and some extra checks like standalone 1 and 0 queries.
  29.145 -
  29.146 -
  29.147 -    Overview mode (-o switch)
  29.148 -
  29.149 -       This mode just gives a count of queries found
  29.150 -       instead of a detailed list.
  29.151 -
  29.152 -
  29.153 -    Header quote  (-h switch)
  29.154 -
  29.155 -       If you use the -h switch, gutcheck will also display
  29.156 -       the Title, Author, Release and Edition fields from the
  29.157 -       PG header. This is useful mostly for the automated
  29.158 -       checks we do on recently-posted texts.
  29.159 -
  29.160 -
  29.161 -    Errors to stdout (-y switch)
  29.162 -
  29.163 -       If you're just running gutcheck normally, you can ignore
  29.164 -       this. It's only there for programs that provide a front
  29.165 -       end to gutcheck. It makes error messages appear within
  29.166 -       the output of gutcheck so that the front end knows whether
  29.167 -       gutcheck ran OK.
  29.168 -
  29.169 -
  29.170 -    Verbose reporting (-v switch)
  29.171 -
  29.172 -       Normally, if gutcheck sees lots of long lines, short lines,
  29.173 -       spaced dashes, non-ASCII characters or dot-commas ".," it
  29.174 -       assumes these are features of the text, counts and summarizes
  29.175 -       them at the top of its report, but does not list them 
  29.176 -       individually. If the -v switch is on, gutcheck will list them all.
  29.177 -
  29.178 -
  29.179 -    Markup interpretation (-m switch)
  29.180 -
  29.181 -       Normally, gutcheck flags anything it suspects of being HTML
  29.182 -       markup as a possible error. When you use the -m switch,
  29.183 -       however, it matches anything that looks like markup against
  29.184 -       a short list of common HTML tags and entities. If the markup
  29.185 -       is in that list, it either ignores the markup, in the case
  29.186 -       of a tag, or "interprets" the markup as its nearest ASCII 
  29.187 -       equivalent, in the case of an entity. So, for example, using
  29.188 -       this switch, gutcheck will "see"
  29.189 -
  29.190 -       &ldquo;He went <i>thataway!</i>&rdquo;
  29.191 -
  29.192 -       as
  29.193 -
  29.194 -       "He went thataway!"
  29.195 -
  29.196 -       and report accordingly.
  29.197 -
  29.198 -       This switch does not, not, NOT check the validity of HTML;
  29.199 -       it exists so that you can run gutcheck on most HTML texts
  29.200 -       for PG, and get sane results. It does not support all tags.
  29.201 -       It does not support all entities. When it sees a tag or entity
  29.202 -       it does not recognize, it will query it as HTML just as if
  29.203 -       you hadn't specified the -m switch.
  29.204 -
  29.205 -       Gutcheck 0.99 will automatically switch on markup interpretation
  29.206 -       if it sees a lot of tags that appear to be markup, so mostly, you
  29.207 -       won't have to specify this.
  29.208 -
  29.209 -    User-defined typos (-u switch)
  29.210 -
  29.211 -        If you have a file named gutcheck.typ either in your current
  29.212 -        working directory or in the directory from which you explicitly
  29.213 -        invoked gutcheck, but not necessarily on your path, and if you
  29.214 -        specify the -u switch, gutcheck will query any word specified 
  29.215 -        in that file. The file is simple: one word, in lower case, per
  29.216 -        line. 999 lines are allowed for. Be careful not to put multiple
  29.217 -        words onto a line, or leave any rubbish other than the word on
  29.218 -        the line. You should have received a sample file gutcheck.typ
  29.219 -        with this package.
  29.220 -
  29.221 -    Ignore DP markup (-d switch)
  29.222 -        
  29.223 -        Distributed Proofreaders (http://www.pgdp.net) is currently
  29.224 -        (2005) the main source of PG texts, and proofers there use
  29.225 -        special conventions. This switch understands those conventions,
  29.226 -        so that people can use gutcheck on files in process that still
  29.227 -        haven't had the special conventions removed yet. The special
  29.228 -        conventions supported in 0.99 are page-separators and
  29.229 -        "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/".
  29.230 -
  29.231 -
  29.232 -You will probably only run gutcheck on a text once or maybe twice,
  29.233 -just prior to uploading; it usually finds a few formatting problems;
  29.234 -it also usually finds queries that aren't problems at all - it often
  29.235 -questions Tables of Contents for having short lines, for example.
  29.236 -These are called "false positives", and need a human to decide on
  29.237 -them.
  29.238 -
  29.239 -The text should be standard prose, and already close to PG normal
  29.240 -format (plain text, about 70 characters per line with blank lines
  29.241 -between paragraphs).
  29.242 -
  29.243 -Gutcheck merely draws your attention to things that might be errors.
  29.244 -It is NOT a substitute for human judgement. Formatting choices like
  29.245 -short lines may be for a reason that this program can't understand.
  29.246 -
  29.247 -Even the most careful human proofing can leave errors behind in a
  29.248 -text, and there are several automated checks you can do to help find
  29.249 -them. Of these, spellchecking (with _very_ careful human judgement) is
  29.250 -the most important and most useful.
  29.251 -
  29.252 -Gutcheck does perform some basic typo-checking if you ask it to,
  29.253 -but its focus is on formatting errors specific to PG texts - 
  29.254 -mismatched quotes, non-ASCII characters, bad spacing, bad line
  29.255 -length, HTML tags perhaps left from a conversion, unbalanced
  29.256 -brackets.
  29.257 -
  29.258 -Suggestions for additional checks would be appreciated and duly 
  29.259 -considered, but no guarantees that they will be implemented.
  29.260 -
  29.261 -
  29.262 -
  29.263 -
  29.264 -                How do _I_ use it?
  29.265 -
  29.266 -Practically everyone I give gutcheck to asks me how _I_ use it.
  29.267 -Well, when I get a text for posting, say filename.txt, I run
  29.268 -
  29.269 -    gutcheck -o filename.txt
  29.270 -
  29.271 -That gives me a quick idea what I'm dealing with. It'll tell
  29.272 -me what kind of problems gutcheck sees, and give me an idea 
  29.273 -of how much more work needs to be done on the text. Keep in 
  29.274 -mind that gutcheck doesn't do anything like a full spellcheck,
  29.275 -but when I see a text that has a lot of problems, I assume that
  29.276 -it probably needs a spellcheck too.
  29.277 -
  29.278 -Having got a feel for the ballpark, I run
  29.279 -
  29.280 -    gutcheck filename.txt > jj
  29.281 -
  29.282 -where jj is my personal, all-purpose filename for temporary data
  29.283 -that doesn't need to be kept. Then I open filename.txt and jj in
  29.284 -a split-screen view in my editor, and work down the text, fixing
  29.285 -whatever needs fixing, and skipping whatever doesn't. If your 
  29.286 -editor doesn't split-screen, you can get much the same effect by 
  29.287 -opening your original file in your normal editor, and jj (or your
  29.288 -equivalent name) in something like Notepad, keeping both in view 
  29.289 -at the same time.
  29.290 -
  29.291 -Twice a day, an automatic process looks at all recently-posted
  29.292 -texts, and emails Michael, me, and sometimes other people with
  29.293 -their gutcheck summaries.
  29.294 -
  29.295 -
  29.296 -
  29.297 -        Future development of gutcheck
  29.298 -
  29.299 -Gutcheck has gone about as far as it can, given its current
  29.300 -structure. In order to add better singlequotes checking,
  29.301 -sentence checking, better he/be checking and other good stuff
  29.302 -that I'd like to see, I'll have to rewrite it from a different
  29.303 -angle - looking at the syntax instead of the lines. And I'll
  29.304 -probably get around to that sooner or later.
  29.305 -
  29.306 -Meantime, I'm just trying to get this version stabilized, so
  29.307 -please report any bugs you find. When it is stable, I'll run
  29.308 -up a Windows port for those timid souls who can't look a 
  29.309 -command line in the eye. :-)
  29.310 -
  29.311 -And I've started work on gutspell, a companion to gutcheck
  29.312 -which will concentrate on spelling problems. PG spelling
  29.313 -problems are unusual, since the range of texts we cover is
  29.314 -so wide, and I'll be taking a somewhat unorthodox approach
  29.315 -to writing this spelling-checker _specifically_ for texts
  29.316 -containing a lot of dialect and uncommon words that have
  29.317 -probably already been spell-checked against a standard
  29.318 -modern dictionary.
  29.319 -
  29.320 -
  29.321 -
  29.322 -
  29.323 -Explanations of common gutcheck messages:
  29.324 -
  29.325 -    --> 74 lines in this file have white space at end
  29.326 -
  29.327 -    PG texts shouldn't have extra white space added at end of line.
  29.328 -    Don't worry too much about this; they're not doing any harm,
  29.329 -    and they'll be removed during posting anyway.
  29.330 -
  29.331 -
  29.332 -    --> 348 lines in this file are short. Not reporting short lines.
  29.333 -    --> 84 lines in this file are long. Not reporting long lines.
  29.334 -    --> 8 lines in this file are VERY long!
  29.335 -
  29.336 -    If there are a lot of long or short lines, Gutcheck won't list
  29.337 -    them individually. The short lines version of this message
  29.338 -    is commonly seen when gutchecking poetry and some plays, where
  29.339 -    the normal line length is shorter than the standard for prose.
  29.340 -    A "VERY long" line is one over 80 characters.  You normally
  29.341 -    shouldn't have any of these, but sometimes you may have to render
  29.342 -    a table that must be that long, or some special preformatted
  29.343 -    quotation that can't be broken.
  29.344 -
  29.345 -
  29.346 -    --> There are 75 spaced dashes and em-dashes in this file. Not reporting them.
  29.347 -
  29.348 -    The PG standard for an emdash--like these--is two minus signs
  29.349 -    with no spaces before or after them. However, some older texts
  29.350 -    used spaced dashes - like these -- and if there are very many
  29.351 -    such spaced dashes in the file, gutcheck just draws your
  29.352 -    attention to it and doesn't list them individually.
  29.353 -
  29.354 -
  29.355 -
  29.356 -    Line 3020 - Non-ASCII character 233
  29.357 -
  29.358 -    Standard PG texts should use only ASCII characters with values
  29.359 -    up to 127; however, non-English, accented characters can be 
  29.360 -    represented according to several different non-ASCII encoding 
  29.361 -    schemes, using values over 127. If you have a plain English text
  29.362 -    with a few accented characters in words like cafe or tete-a-tete,
  29.363 -    you should replace the accented characters with their unaccented 
  29.364 -    versions. The English pound sign is another commonly-seen
  29.365 -    non-ASCII character. If you have enough non-ASCII characters in
  29.366 -    your text that you feel removing them would degrade your text
  29.367 -    unacceptably, you should probably consider doing an 8-bit text
  29.368 -    as well as a plain-ASCII version.
  29.369 -
  29.370 -
  29.371 -
  29.372 -    Line 1207 - Non-ISO-8859 character 156
  29.373 -
  29.374 -    Even in "8-bit" texts, there are distinctions between code sets.
  29.375 -    The ISO-8859 family of 8-bit code sets is the most commonly used
  29.376 -    in PG, and these sets do not define values in the range 128 through
  29.377 -    159 as printable characters. It's quite common for someone on a
  29.378 -    Windows or Mac machine to use a non-ISO character inadvertently,
  29.379 -    so this message warns that the character is not only not ASCII,
  29.380 -    but also outside the ISO-8859 range.
  29.381 -
  29.382 -
  29.383 -
  29.384 -    Line 46 - Tab character?
  29.385 -
  29.386 -    Some editors and WPs will put in Tab characters (character 9) to
  29.387 -    indicate indented text. You should not use these in a PG text,
  29.388 -    because you can't be sure how they will appear on a reader's
  29.389 -    screen. Find the Tab, and replace it with the appropriate number
  29.390 -    of spaces.
  29.391 -
  29.392 -
  29.393 -    Line 1327 - Tilde character?
  29.394 -
  29.395 -    The tilde character (~) might be legitimately used, but it's the
  29.396 -    character commonly used by OCR software to indicate a place where
  29.397 -    it couldn't make out the letter, so gutcheck flags it.
  29.398 -
  29.399 -
  29.400 -
  29.401 -    Line 1347 - Asterisk?
  29.402 -
  29.403 -    Asterisks are reported only in paranoid mode (see -x). 
  29.404 -    Like tildes, they are often used to indicate errors, but they are
  29.405 -    also legitimately used as line delimiters and footnote markers.
  29.406 -
  29.407 -
  29.408 -
  29.409 -    Line 1451 - Long line 129
  29.410 -
  29.411 -    PG texts should have lines shorter than 76. There may be occasions
  29.412 -    where you decide that you really have to go out to 79 characters,
  29.413 -    but the sample above says that line 1451 is 129 characters long -
  29.414 -    probably two lines run together.
  29.415 -
  29.416 -
  29.417 -
  29.418 -    Line 1590 - Short line?
  29.419 -
  29.420 -    PG texts should have lines longer than 54 characters. However,
  29.421 -    there are special cases like poetry and tables of contents where
  29.422 -    the lines _should_ be shorter. So treat Gutcheck warnings about
  29.423 -    short lines carefully. Sometimes it's a genuine formatting
  29.424 -    problem; sometimes the line really needs to be short.
  29.425 -
  29.426 -    Hint: gutcheck will not flag lines as short if they are indented
  29.427 -    - if they start with a space. I like to start inserted stanzas
  29.428 -    and other such items indented with a couple of spaces so that 
  29.429 -    they stand out from the main text anyway.
  29.430 -
  29.431 -
  29.432 -
  29.433 -    Line 1804 - Begins with punctuation?
  29.434 -
  29.435 -    Lines should normally not begin with commas, periods and so on.
  29.436 -    An exception is ellipses . . . which can happen at start of line.
  29.437 -
  29.438 -
  29.439 -
  29.440 -    Line 1850 - Spaced em-dash?
  29.441 -
  29.442 -    The PG standard for an em-dash--like these--is two minus signs
  29.443 -    with no spaces before or after them. Gutcheck flags non-PG
  29.444 -    em-dashes - like this one. Normally, you will replace it with a 
  29.445 -    PG-standard em-dash.
  29.446 -
  29.447 -
  29.448 -
  29.449 -    Line 1904 - Query he/be error?
  29.450 -
  29.451 -    Gutcheck makes a very minor effort to look for that scourge of all
  29.452 -    proofreaders, "be" replacing "he" or vice-versa, and draws your
  29.453 -    attention to it when it thinks it has found one.
  29.454 -
  29.455 -
  29.456 -
  29.457 -    Line 2017 - Query digit in a1most
  29.458 -
  29.459 -    The digit 1 is commonly OCRed for the letter l, the digit 0 for
  29.460 -    the letter O, and so on. When gutcheck sees a mix of digits and
  29.461 -    letters, it warns you. It may generate a false positive for
  29.462 -    something like 7am.
  29.463 -
  29.464 -
  29.465 -
  29.466 -    Line 2083 - Query standalone 0
  29.467 -
  29.468 -    In paranoid mode (see -x) only, gutcheck warns about the digit 0 
  29.469 -    and the number 1 standing alone as a word. This can happen if the 
  29.470 -    OCR misreads the words O or I.
  29.471 -
  29.472 -
  29.473 -
  29.474 -    Line 2115 - Query word whetber
  29.475 -
  29.476 -    If you have switched typo-checking on, gutcheck looks for
  29.477 -    potential typos, especially common h/b errors. It's not
  29.478 -    infallible; it sometimes queries legit words, but it's
  29.479 -    always worth taking a look.
  29.480 -
  29.481 -
  29.482 -
  29.483 -    Line 2190 column 14 - Missing space?
  29.484 -
  29.485 -    Omitting a space is a very common error,especially coming from
  29.486 -    OCRed text,and can be hard for a human to spot. The commas in
  29.487 -    the previous sentence illustrate the kind of thing I mean.
  29.488 -
  29.489 -
  29.490 -
  29.491 -    Line 2240 column 48 - Spaced punctuation?
  29.492 -
  29.493 -    The flip side of the "missing space" error , here , is when extra
  29.494 -    spaces are added before punctuation . Some old texts appear to add
  29.495 -    extra spaces around punctuation consistently, but this was a
  29.496 -    typographical convention rather than the author's intent, and the
  29.497 -    extra "spaces" should be removed when preparing a PG text.
  29.498 -
  29.499 -
  29.500 -
  29.501 -    Line 2301 column 19 - Unspaced quotes?
  29.502 -
  29.503 -    Another common spacing problem occurs in a phrase like "You wait
  29.504 -    there,"he said.
  29.505 -
  29.506 -
  29.507 -
  29.508 -    Line 2385 column 27 - Wrongspaced quotes?
  29.509 -
  29.510 -    As of version 0.98, gutcheck adds extra checks on whether a quote
  29.511 -    seems to be a start or end quote, and queries those that appear to
  29.512 -    be misplaced. This does give rise to false positives when quotes are
  29.513 -    nested, for example:
  29.514 -
  29.515 -    "And how," she asked, "will your "friends" help you now?"
  29.516 -
  29.517 -    but these false positives are worth it because of the many cases
  29.518 -    that this test catches, notably those like:
  29.519 -
  29.520 -    "And how, "she said," will your friends help you now?"
  29.521 -
  29.522 -    Sometimes a "wrongspaced quotes" query will arise because an earlier
  29.523 -    quote in the paragraph was omitted, so if the place specified seems
  29.524 -    to be OK, look back to see whether there's a problem in the preceding
  29.525 -    lines.
  29.526 -
  29.527 -
  29.528 -
  29.529 -    Line 2400 - HTML Tag? <PRE>
  29.530 -
  29.531 -    Some PG texts have been converted from HTML, and not all of the
  29.532 -    HTML tags have been removed.
  29.533 -
  29.534 -
  29.535 -
  29.536 -    Line 2402 - HTML symbol? &emdash;
  29.537 -
  29.538 -    Similarly, special HTML symbol characters can survive into PG
  29.539 -    texts. Can occasionally produce amusing false positives like
  29.540 -    . . . Marwick & Co were well known for it;
  29.541 -
  29.542 -
  29.543 -
  29.544 -    Line 2540 - Mismatched quotes
  29.545 -
  29.546 -    Another gutcheck mainstay - unclosed doublequotes in a paragraph.
  29.547 -    See the discussion of quotes in the switches section near the
  29.548 -    start of this file.
  29.549 -    
  29.550 -    Since the mismatch doesn't occur on any one line, gutcheck quotes
  29.551 -    the line number of the first blank line following the paragraph,
  29.552 -    since this is the point where it reconciles the count of quotes.
  29.553 -    However, if gutcheck is echoing lines, that is, you haven't used
  29.554 -    the -e switch, it will show the _first_ line of the paragraph, 
  29.555 -    to help you find the place without using line numbers. The 
  29.556 -    offending paragraph is therefore between the quoted line and 
  29.557 -    the line number given.
  29.558 -
  29.559 -
  29.560 -
  29.561 -    Line 2587 - Mismatched single quotes
  29.562 -
  29.563 -    Only checked with the -s switch, since checking single quotes is 
  29.564 -    not a very reliable process. Otherwise, the same logic as for 
  29.565 -    doublequotes applies.
  29.566 -
  29.567 -
  29.568 -
  29.569 -    Line 2877 - Mismatched round brackets?
  29.570 -
  29.571 -    Also curly and square brackets. Texts with a lot of brackets, like
  29.572 -    plays with bracketed stage instructions, may have mismatches.
  29.573 -
  29.574 -
  29.575 -    Line 3150 - No CR?
  29.576 -    Line 3204 - Two successive CRs?
  29.577 -    Line 3281 position 75 - CR without LF?
  29.578 -
  29.579 -    These are the invalid line-end warnings. See the discussion of
  29.580 -    line-end checking in the switches section near the start of this
  29.581 -    file. If you see these, and your editor doesn't show anything
  29.582 -    wrong, you should probably try deleting the characters just before
  29.583 -    and after the line end, and the line-end itself, then retyping the
  29.584 -    characters and the line-end.
  29.585 -
  29.586 -
  29.587 -    Line 2940 - Paragraph starts with lower-case
  29.588 -
  29.589 -    A common error in an e-text is for an extra blank line
  29.590 -
  29.591 -    to be put in, like the blank line above, and this often
  29.592 -    shows up as a new paragraph beginning with lower case.
  29.593 -    Sometimes the blank line is deliberate, as when a 
  29.594 -    quotation is inserted in a speech. Use your judgement.
  29.595 -
  29.596 -
  29.597 -    Line 2987 - Extra period?
  29.598 -
  29.599 -    An extra period. is a. common problem in OCRed text. and usually
  29.600 -    arises when a speck of dust on the page is mistaken for a period.
  29.601 -    or. as occasionally happens. when a comma loses its tail.
  29.602 -
  29.603 -
  29.604 -    Line 3012 column 12 - Double punctuation?
  29.605 -
  29.606 -    Double punctuation., like that,, is a common typo and
  29.607 -    scanno. Some books have much legit double punctuation,
  29.608 -    like etc., etc., but it's worth checking anyway.
  29.609 -
  29.610 -
  29.611 -
  29.612 -            *       *       *        *
  29.613 -
  29.614 -For Windows-only users who are unfamiliar with DOS:
  29.615 -
  29.616 -    If you're a Windows-only user, you need to save
  29.617 -    gutcheck.exe into the folder (directory) where the
  29.618 -    text file you want to check is. Let's say your
  29.619 -    text file is in C:\GUT, then you should save
  29.620 -    GUTCHECK.EXE into C:\GUT.
  29.621 -
  29.622 -    Now get to a DOS prompt. You can do this by
  29.623 -    selecting the "Command Prompt" or "MS-DOS Prompt"
  29.624 -    option that will be somewhere on your
  29.625 -    Start/Programs menu.
  29.626 -
  29.627 -    Now get into the C:\GUT directory. 
  29.628 -    You can do this using the CD (change directory) 
  29.629 -    command, like this:
  29.630 -        CD \GUT
  29.631 -    and your prompt will change to 
  29.632 -        C:\GUT>
  29.633 -    so you know you're in the right place.
  29.634 -
  29.635 -    Now type
  29.636 -        gutcheck yourfile.txt
  29.637 -    and you'll see gutcheck's report
  29.638 -
  29.639 -    By default, gutcheck prints its queries to screen.
  29.640 -    If you want to create a file of them, to edit
  29.641 -    against the text, you can use the greater-than
  29.642 -    sign (>) to tell it to output the report to a
  29.643 -    file. For example, if you want its report in a
  29.644 -    file called QUERIES.LST, you could type
  29.645 -    
  29.646 -        gutcheck yourfile.txt > queries.lst
  29.647 -
  29.648 -    The queries.lst file will then contain the listing
  29.649 -    of possible formatting errors, and you can
  29.650 -    edit it alongside your text.
  29.651 -
  29.652 -    Whatever you do, DON'T make the filename after
  29.653 -    the greater-than sign the name of a file already
  29.654 -    on your disk that you want to keep, because
  29.655 -    the greater-than sign will cause gutcheck to
  29.656 -    replace any existing file of that name.
  29.657 -
  29.658 -    So, for example, if you have two Tolstoy files
  29.659 -    that you want to check, called WARPEACE.TXT and 
  29.660 -    ANNAK.TXT, make sure that neither of these names
  29.661 -    is ever used following the greater-than sign.
  29.662 -    To check these correctly, you might do:
  29.663 -
  29.664 -    gutcheck warpeace.txt >war.lst
  29.665 -
  29.666 -    and
  29.667 -
  29.668 -    gutcheck annak.txt > annak.lst
  29.669 -
  29.670 -    separately. Then you can look at war.lst and annak.lst
  29.671 -    to see the gutcheck reports.
  29.672 -
  29.673 -            *       *       *        *
  29.674 -
  29.675 -
  29.676 -For existing 0.98 users upgrading to 0.99:
  29.677 -
  29.678 -    If you run on old 16-bit DOS or Windows 3.x, I'm afraid
  29.679 -    you're out of luck. I'm not saying it _can't_ be compiled
  29.680 -    to run on 16-bit, but the executable with the package is
  29.681 -    for Win32 only. *nix users won't notice the change at all.
  29.682 -
  29.683 -
  29.684 -    There are two new switches: -u and -d. 
  29.685 -          See above for full rundown.
  29.686 -
  29.687 -
  29.688 -Here's a list of the new errors:
  29.689 -
  29.690 -    Line 1456 - Carat character?
  29.691 -
  29.692 -    I^ve found a few.
  29.693 -
  29.694 -
  29.695 -    Line 1821 - Forward slash?
  29.696 -
  29.697 -    Common error for italicized "I", or so /'ve found.
  29.698 -
  29.699 -
  29.700 -    Line 2139 - Query missing paragraph break?
  29.701 -
  29.702 -    "Come here, son." "Do I _have_ to go, dad?"
  29.703 -    Like that. False positives in some texts. Sorry 'bout that,
  29.704 -    but these are often errors.
  29.705 -
  29.706 -
  29.707 -    Line 2200 - Query had/bad error?
  29.708 -
  29.709 -    Clear enough. Doesn't catch as many as I'd like it to,
  29.710 -    but rarely gives false alarms.
  29.711 -
  29.712 -
  29.713 -    Line 2268 - Query punctuation after the?
  29.714 -
  29.715 -    Some words, like "the", very rarely have punctuation
  29.716 -    following them. Others, like "Mrs", usually have a
  29.717 -    period, but never a comma. Occasional false positives.
  29.718 -
  29.719 -
  29.720 -    Line 2380 - Query possible scanno arid
  29.721 -
  29.722 -    It found one of your user-defined typos when you
  29.723 -    used the -u switch.
  29.724 -
  29.725 -
  29.726 -    Line 2511 - Capital "S"?
  29.727 -
  29.728 -    Surprisingly common specific case, like: Jane'S 
  29.729 -
  29.730 -    
  29.731 -    Line 3469 - endquote missing punctuation?
  29.732 -
  29.733 -    OK. This one can really cause a lot of false positives
  29.734 -    in some books, but it switches itself off if it finds
  29.735 -    more than 20 in a text, unless you force it to list them
  29.736 -    all with the -v switch.
  29.737 -    "Hey, dad" Johnny said, "can we go now?"
  29.738 -    is a common punctuation-missing error.
  29.739 -
  29.740 -
  29.741 -    Line 4266 - Mismatched underscores?
  29.742 -
  29.743 -    Like mismatched anything else!
  29.744 -
  29.745 -

    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/doc/loupe-test.txt	Fri Jan 27 10:30:16 2012 +0000
    30.3 @@ -0,0 +1,68 @@
    30.4 +                           bookloupe test framework
    30.5 +                           ========================
    30.6 +
    30.7 +Running existing testcases
    30.8 +--------------------------
    30.9 +
   30.10 +The test harness (the program that runs a test) is called loupe-test. The
   30.11 +various testcases are stored in multiple text files, typically with a .tst
   30.12 +extension.
   30.13 +
   30.14 +To run a testcase when all of bookloupe, loupe-test and the testcase file are
   30.15 +in the current directory simply do something like:
   30.16 +
   30.17 +% loupe-test missing-space.tst
   30.18 +
   30.19 +from a command prompt. Under MS-Windows, this is called a command window and
   30.20 +the prompt will normally look slightly different, eg.,
   30.21 +
   30.22 +C:\DP> loupe-test missing-space.tst
   30.23 +
   30.24 +To run all the tests in the current directory, do something like this:
   30.25 +
   30.26 +% loupe-test *.tst
   30.27 +
   30.28 +If bookloupe is not in the current directory or you want to run the testsuite
   30.29 +against gutcheck (the program that bookloupe is based on), then you can set an
   30.30 +environment variable (BOOKLOUPE) to point at it. For example, on MS-Windows
   30.31 +you might do:
   30.32 +
   30.33 +C:\DP> set BOOKLOUPE=C:\GUTCHECK\GUTCHECK.EXE
   30.34 +C:\DP> loupe-test *.tst
   30.35 +
   30.36 +Writing your own testcases
   30.37 +--------------------------
   30.38 +
   30.39 +Writing a new testcase is pretty painless. Most testcases follow this simple
   30.40 +pattern:
   30.41 +
   30.42 +		â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
   30.43 +		â”‚**************** INPUT ****************   â”‚
   30.44 +		â”‚"Look!John, over there!"                  â”‚
   30.45 +		â”‚**************** EXPECTED ****************â”‚
   30.46 +		â”‚                                          â”‚
   30.47 +		â”‚"Look!John, over there!"                  â”‚
   30.48 +		â”‚    Line 1 column 6 - Missing space?      â”‚
   30.49 +		â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
   30.50 +
   30.51 +The sixteen asterisks in this example form what is known as the "flag". This
   30.52 +flag must come before and after all tags (eg., INPUT and EXPECTED). In the
   30.53 +unlikely event that you need sixteen asterisks at the start of a line of text,
   30.54 +then simply choose a different flag and use it throughout the file (flags
   30.55 +can be any sequence of ASCII characters except control codes and space).
   30.56 +
   30.57 +Note that the header that bookloupe and gutcheck normally output is not
   30.58 +included in the expected output. This avoids problems with not knowing
   30.59 +beforehand the name of the file that bookloupe/gutcheck will be asked to
   30.60 +look at (and saves typing!). bookloupe (and gutcheck) prints a blank line
   30.61 +before each warning. These are not part of the header and so do need to
   30.62 +be included.
   30.63 +
   30.64 +To test that bookloupe produces no output, you still need to include
   30.65 +an EXPECTED tag, just with no text following it. If there is no EXPECTED
   30.66 +tag, then loupe-test will consider that no expectation exists and won't
   30.67 +check the output at all.
   30.68 +
   30.69 +There is no support yet for non-ASCII testcases, embedded linefeeds,
   30.70 +passing command line options to bookloupe or for testcases which are
   30.71 +expected to fail.

    31.1 --- a/gclib/Makefile.am	Fri Jan 27 00:28:11 2012 +0000
    31.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.3 @@ -1,10 +0,0 @@
    31.4 -INCLUDES=-I$(top_srcdir)
    31.5 -AM_CFLAGS=$(GLIB_CFLAGS)
    31.6 -LIBS=$(GLIB_LIBS)
    31.7 -
    31.8 -noinst_LTLIBRARIES=libgc.la
    31.9 -libgc_la_SOURCES=gclib.h textfileutils.c textfileutils.h spawn.c spawn.h
   31.10 -if !HAVE_GLIB
   31.11 -libgc_la_SOURCES+=macros.h types.h fileutils.c fileutils.h mem.c mem.h \
   31.12 -  strfuncs.c strfuncs.h gcstring.c gcstring.h utils.c utils.h
   31.13 -endif

    32.1 --- a/gclib/fileutils.c	Fri Jan 27 00:28:11 2012 +0000
    32.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.3 @@ -1,46 +0,0 @@
    32.4 -#include <stdlib.h>
    32.5 -#include <stdio.h>
    32.6 -#include <gclib/macros.h>
    32.7 -#include <gclib/mem.h>
    32.8 -#include <gclib/fileutils.h>
    32.9 -#include <gclib/gcstring.h>
   32.10 -
   32.11 -/*
   32.12 - * Read a file into memory (which should be freed with mem_free when no
   32.13 - * longer required). Returns FALSE on error and outputs a suitable error
   32.14 - * message to stderr.
   32.15 - */
   32.16 -boolean file_get_contents(const char *filename,char **contents,size_t *length)
   32.17 -{
   32.18 -    FILE *fp;
   32.19 -    size_t n;
   32.20 -    char *buffer;
   32.21 -    String *string;
   32.22 -    fp=fopen(filename,"rb");
   32.23 -    if (!fp)
   32.24 -    {
   32.25 -	perror(filename);
   32.26 -	return FALSE;
   32.27 -    }
   32.28 -    buffer=mem_new(char,1024);
   32.29 -    string=string_new(NULL);
   32.30 -    do
   32.31 -    {
   32.32 -	n=fread(buffer,1,1024,fp);
   32.33 -	if (n<0)
   32.34 -	{
   32.35 -	    perror(filename);
   32.36 -	    string_free(string,TRUE);
   32.37 -	    mem_free(buffer);
   32.38 -	    free(fp);
   32.39 -	    return FALSE;
   32.40 -	}
   32.41 -	string_append_len(string,buffer,n);
   32.42 -    } while(n);
   32.43 -    mem_free(buffer);
   32.44 -    if (length)
   32.45 -	*length=string->len;
   32.46 -    *contents=string_free(string,FALSE);
   32.47 -    fclose(fp);
   32.48 -    return TRUE;
   32.49 -}

    33.1 --- a/gclib/fileutils.h	Fri Jan 27 00:28:11 2012 +0000
    33.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.3 @@ -1,8 +0,0 @@
    33.4 -#ifndef GC_FILEUTILS_H
    33.5 -#define GC_FILEUTILS_H
    33.6 -
    33.7 -#include <gclib/types.h>
    33.8 -
    33.9 -boolean file_get_contents(const char *filename,char **contents,size_t *length);
   33.10 -
   33.11 -#endif /* GC_FILEUTILS_H */

    34.1 --- a/gclib/gclib.h	Fri Jan 27 00:28:11 2012 +0000
    34.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.3 @@ -1,36 +0,0 @@
    34.4 -#if HAVE_GLIB
    34.5 -
    34.6 -#include <glib.h>
    34.7 -#define GC_DIR_SEPARATOR G_DIR_SEPARATOR
    34.8 -#define GC_DIR_SEPARATOR_S G_DIR_SEPARATOR_S
    34.9 -#define GC_IS_DIR_SEPARATOR(c) G_IS_DIR_SEPARATOR(c)
   34.10 -#define boolean gboolean
   34.11 -#define String GString
   34.12 -#define mem_new0 g_new0
   34.13 -#define mem_free g_free
   34.14 -#define str_dup g_strdup
   34.15 -#define str_ndup g_strndup
   34.16 -#define path_get_basename g_path_get_basename
   34.17 -#define file_get_contents(filename,contents,length) \
   34.18 -  g_file_get_contents(filename,contents,length,NULL)
   34.19 -#define string_new g_string_new
   34.20 -#define string_append g_string_append
   34.21 -#define string_append_len g_string_append_len
   34.22 -#define string_append_c g_string_append_c
   34.23 -#define string_free g_string_free
   34.24 -#define string_set_size g_string_set_size
   34.25 -
   34.26 -#else	/* !HAVE_GLIB */
   34.27 -
   34.28 -#include <gclib/macros.h>
   34.29 -#include <gclib/types.h>
   34.30 -#include <gclib/mem.h>
   34.31 -#include <gclib/fileutils.h>
   34.32 -#include <gclib/strfuncs.h>
   34.33 -#include <gclib/gcstring.h>
   34.34 -#include <gclib/utils.h>
   34.35 -
   34.36 -#endif	/* HAVE_GLIB */
   34.37 -
   34.38 -#include <gclib/textfileutils.h>
   34.39 -#include <gclib/spawn.h>

    35.1 --- a/gclib/gcstring.c	Fri Jan 27 00:28:11 2012 +0000
    35.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.3 @@ -1,90 +0,0 @@
    35.4 -#include <stdlib.h>
    35.5 -#include <string.h>
    35.6 -#include <gclib/gcstring.h>
    35.7 -#include <gclib/types.h>
    35.8 -#include <gclib/mem.h>
    35.9 -#include <gclib/strfuncs.h>
   35.10 -
   35.11 -/*
   35.12 - * Strings which manage their own memory
   35.13 - */
   35.14 -
   35.15 -String *string_new(const char *init)
   35.16 -{
   35.17 -    String *string=mem_new(String,1);
   35.18 -    if (!init)
   35.19 -	init="";
   35.20 -    string->len=strlen(init);
   35.21 -    string->alloc=string->len+1;
   35.22 -    string->str=str_dup(init);
   35.23 -    return string;
   35.24 -}
   35.25 -
   35.26 -/*
   35.27 - * Free a string and either return the contents (if free_segment is FALSE)
   35.28 - * or free the contents as well and return NULL (if free_segment is TRUE).
   35.29 - */
   35.30 -char *string_free(String *string,boolean free_segment)
   35.31 -{
   35.32 -    char *retval;
   35.33 -    if (free_segment)
   35.34 -    {
   35.35 -	mem_free(string->str);
   35.36 -	retval=NULL;
   35.37 -    }
   35.38 -    else
   35.39 -	retval=string->str;
   35.40 -    mem_free(string);
   35.41 -    return retval;
   35.42 -}
   35.43 -
   35.44 -/*
   35.45 - * Append a byte to string.
   35.46 - */
   35.47 -void string_append_c(String *string,char c)
   35.48 -{
   35.49 -    if (string->len+1==string->alloc)
   35.50 -    {
   35.51 -	string->alloc*=2;
   35.52 -	string->str=mem_renew(char,string->str,string->alloc);
   35.53 -    }
   35.54 -    string->str[string->len++]=c;
   35.55 -    string->str[string->len]='\0';
   35.56 -}
   35.57 -
   35.58 -/*
   35.59 - * Append len bytes from s to string. len may be passed as <0 if s is
   35.60 - * a nul-terminated string of unknown length.
   35.61 - */
   35.62 -void string_append_len(String *string,const char *s,ssize_t len)
   35.63 -{
   35.64 -    if (len<0)
   35.65 -	len=strlen(s);
   35.66 -    if (string->len+len>=string->alloc)
   35.67 -    {
   35.68 -	while (string->len+len>=string->alloc)
   35.69 -	    string->alloc*=2;
   35.70 -	string->str=mem_renew(char,string->str,string->alloc);
   35.71 -    }
   35.72 -    memcpy(string->str+string->len,s,len);
   35.73 -    string->len+=len;
   35.74 -    string->str[string->len]='\0';
   35.75 -}
   35.76 -
   35.77 -/*
   35.78 - * Sets the length of a String. If the length is less than the current length,
   35.79 - * the string will be truncated. If the length is greater than the current
   35.80 - * length, the contents of the newly added area are undefined. (However, as
   35.81 - * always, string->str[string->len] will be a nul byte.)
   35.82 - */
   35.83 -void string_set_size(String *string,size_t len)
   35.84 -{
   35.85 -    if (len>=string->alloc)
   35.86 -    {
   35.87 -	while (len>=string->alloc)
   35.88 -	    string->alloc*=2;
   35.89 -	string->str=mem_renew(char,string->str,string->alloc);
   35.90 -    }
   35.91 -    string->len=len;
   35.92 -    string->str[string->len]='\0';
   35.93 -}

    36.1 --- a/gclib/gcstring.h	Fri Jan 27 00:28:11 2012 +0000
    36.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.3 @@ -1,18 +0,0 @@
    36.4 -#ifndef GC_STRING_H
    36.5 -#define GC_STRING_H
    36.6 -
    36.7 -#include <unistd.h>
    36.8 -#include <gclib/types.h>
    36.9 -
   36.10 -typedef struct {
   36.11 -    char *str;
   36.12 -    size_t alloc,len;
   36.13 -} String;
   36.14 -
   36.15 -String *string_new(const char *init);
   36.16 -char *string_free(String *string,boolean free_segment);
   36.17 -void string_append_c(String *string,char c);
   36.18 -void string_append_len(String *string,const char *s,ssize_t len);
   36.19 -#define string_append(string,s)		string_append_len(string,s,-1)
   36.20 -
   36.21 -#endif /* GC_STRING_H */

    37.1 --- a/gclib/macros.h	Fri Jan 27 00:28:11 2012 +0000
    37.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.3 @@ -1,7 +0,0 @@
    37.4 -#ifndef FALSE
    37.5 -#define FALSE	0
    37.6 -#endif
    37.7 -
    37.8 -#ifndef TRUE
    37.9 -#define TRUE	(!FALSE)
   37.10 -#endif

    38.1 --- a/gclib/mem.c	Fri Jan 27 00:28:11 2012 +0000
    38.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.3 @@ -1,54 +0,0 @@
    38.4 -#include <stdlib.h>
    38.5 -#include <stdio.h>
    38.6 -#include <string.h>
    38.7 -#include <gclib/mem.h>
    38.8 -
    38.9 -/*
   38.10 - * A memory allocator that aborts on failure (so that the caller never
   38.11 - * needs to handle out of memory, which we assume is very unlikely to
   38.12 - * happen under normal circumstances on any modern machine).
   38.13 - */
   38.14 -void *mem_alloc(size_t nmemb,size_t size)
   38.15 -{
   38.16 -    void *ptr=malloc(nmemb*size);
   38.17 -    if (!ptr)
   38.18 -    {
   38.19 -	fprintf(stderr,
   38.20 -	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
   38.21 -	  (unsigned long)nmemb,(unsigned long)size);
   38.22 -	abort();
   38.23 -    }
   38.24 -    return ptr;
   38.25 -}
   38.26 -
   38.27 -/*
   38.28 - * As mem_new, but new memory is cleared to zero.
   38.29 - */
   38.30 -void *mem_alloc0(size_t nmemb,size_t size)
   38.31 -{
   38.32 -    void *ptr=calloc(nmemb,size);
   38.33 -    if (!ptr)
   38.34 -    {
   38.35 -	fprintf(stderr,
   38.36 -	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
   38.37 -	  (unsigned long)nmemb,(unsigned long)size);
   38.38 -	abort();
   38.39 -    }
   38.40 -    return ptr;
   38.41 -}
   38.42 -
   38.43 -/*
   38.44 - * Grow or shrink a memory block, aborting on failure.
   38.45 - */
   38.46 -void *mem_realloc(void *ptr,size_t nmemb,size_t size)
   38.47 -{
   38.48 -    ptr=realloc(ptr,nmemb*size);
   38.49 -    if (!ptr)
   38.50 -    {
   38.51 -	fprintf(stderr,
   38.52 -	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
   38.53 -	  (unsigned long)nmemb,(unsigned long)size);
   38.54 -	abort();
   38.55 -    }
   38.56 -    return ptr;
   38.57 -}

    39.1 --- a/gclib/mem.h	Fri Jan 27 00:28:11 2012 +0000
    39.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.3 @@ -1,13 +0,0 @@
    39.4 -#ifndef GC_MEM_H
    39.5 -#define GC_MEM_H
    39.6 -
    39.7 -void *mem_alloc(size_t nmemb,size_t size);
    39.8 -void *mem_alloc0(size_t nmemb,size_t size);
    39.9 -void *mem_realloc(void *ptr,size_t nmemb,size_t size);
   39.10 -
   39.11 -#define mem_new(type,n)		((type *)mem_alloc(n,sizeof(type)))
   39.12 -#define mem_new0(type,n)	((type *)mem_alloc0(n,sizeof(type)))
   39.13 -#define mem_renew(type,ptr,n)	((type *)mem_realloc(ptr,n,sizeof(type)))
   39.14 -#define mem_free(ptr)		free(ptr)
   39.15 -
   39.16 -#endif /* GC_MEM_H */

    40.1 --- a/gclib/spawn.c	Fri Jan 27 00:28:11 2012 +0000
    40.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.3 @@ -1,84 +0,0 @@
    40.4 -#include <stdlib.h>
    40.5 -#include <stdio.h>
    40.6 -#ifndef WIN32
    40.7 -#include <sys/wait.h>
    40.8 -#endif
    40.9 -#include <gclib/gclib.h>
   40.10 -
   40.11 -#define SPAWN_BUFSIZE	128
   40.12 -
   40.13 -boolean spawn_sync(char **argv,char **standard_output,int *exit_status)
   40.14 -{
   40.15 -/* Don't use g_spawn_sync on WIN32 for now to avoid needing the helper */
   40.16 -#if HAVE_GLIB && !defined(WIN32)
   40.17 -    char *standard_error;
   40.18 -    GError *error=NULL;
   40.19 -    gboolean retval;
   40.20 -    GSpawnFlags flags=G_SPAWN_SEARCH_PATH;
   40.21 -    if (!standard_output)
   40.22 -	flags=G_SPAWN_STDOUT_TO_DEV_NULL;
   40.23 -    retval=g_spawn_sync(NULL,argv,NULL,flags,NULL,NULL,standard_output,
   40.24 -      &standard_error,exit_status,&error);
   40.25 -    fputs(standard_error,stderr);
   40.26 -    g_free(standard_error);
   40.27 -    if (!retval)
   40.28 -    {
   40.29 -	fprintf(stderr,"%s\n",error->message);
   40.30 -	g_error_free(error);
   40.31 -    }
   40.32 -    else if (exit_status)
   40.33 -	*exit_status=WEXITSTATUS(*exit_status);
   40.34 -    return retval;
   40.35 -#else
   40.36 -    FILE *fp;
   40.37 -    int i,r;
   40.38 -    size_t n,len;
   40.39 -    String *command_line,*string;
   40.40 -    command_line=string_new(NULL);
   40.41 -    for(i=0;argv[i];i++)
   40.42 -    {
   40.43 -	if (i)
   40.44 -	    string_append_c(command_line,' ');
   40.45 -	string_append(command_line,argv[i]);
   40.46 -    }
   40.47 -    fp=popen(command_line->str,"r");
   40.48 -    string_free(command_line,TRUE);
   40.49 -    if (!fp)
   40.50 -    {
   40.51 -	perror(command_line->str);
   40.52 -	return FALSE;
   40.53 -    }
   40.54 -    string=string_new(NULL);
   40.55 -    do
   40.56 -    {
   40.57 -	len=string->len;
   40.58 -	string_set_size(string,len+SPAWN_BUFSIZE);
   40.59 -	n=fread(string->str+len,1,SPAWN_BUFSIZE,fp);
   40.60 -	if (n<0)
   40.61 -	{
   40.62 -	    perror("fread");
   40.63 -	    (void)pclose(fp);
   40.64 -	    string_free(string,TRUE);
   40.65 -	    return FALSE;
   40.66 -	}
   40.67 -	string_set_size(string,len+n);
   40.68 -    } while(n);
   40.69 -    r=pclose(fp);
   40.70 -    if (r<0)
   40.71 -    {
   40.72 -	perror("pclose");
   40.73 -	string_free(string,TRUE);
   40.74 -	return FALSE;
   40.75 -    }
   40.76 -    else
   40.77 -    {
   40.78 -	if (exit_status)
   40.79 -	    *exit_status=r;
   40.80 -	if (standard_output)
   40.81 -	    *standard_output=string_free(string,FALSE);
   40.82 -	else
   40.83 -	    string_free(string,TRUE);
   40.84 -	return TRUE;
   40.85 -    }
   40.86 -#endif
   40.87 -}

    41.1 --- a/gclib/spawn.h	Fri Jan 27 00:28:11 2012 +0000
    41.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.3 @@ -1,8 +0,0 @@
    41.4 -#ifndef GC_SPAWN_H
    41.5 -#define GC_SPAWN_H
    41.6 -
    41.7 -#include <gclib/gclib.h>
    41.8 -
    41.9 -boolean spawn_sync(char **argv,char **standard_output,int *exit_status);
   41.10 -
   41.11 -#endif /* GC_SPAWN_H */

    42.1 --- a/gclib/strfuncs.c	Fri Jan 27 00:28:11 2012 +0000
    42.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    42.3 @@ -1,26 +0,0 @@
    42.4 -#include <stdlib.h>
    42.5 -#include <string.h>
    42.6 -#include <gclib/mem.h>
    42.7 -#include <gclib/strfuncs.h>
    42.8 -
    42.9 -/*
   42.10 - * Like strndup, but only returns NULL if str is NULL.
   42.11 - * Note that this routine copies n bytes rather than n characters.
   42.12 - */
   42.13 -char *str_ndup(const char *str,size_t n)
   42.14 -{
   42.15 -    char *dup;
   42.16 -    if (!str)
   42.17 -	return NULL;
   42.18 -    dup=mem_alloc0(n+1,1);
   42.19 -    strncpy(dup,str,n);
   42.20 -    return dup;
   42.21 -}
   42.22 -
   42.23 -/*
   42.24 - * Like strdup, but only returns NULL if str is NULL.
   42.25 - */
   42.26 -char *str_dup(const char *str)
   42.27 -{
   42.28 -    return str_ndup(str,strlen(str));
   42.29 -}

    43.1 --- a/gclib/strfuncs.h	Fri Jan 27 00:28:11 2012 +0000
    43.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.3 @@ -1,7 +0,0 @@
    43.4 -#ifndef GC_STRFUNCS_H
    43.5 -#define GC_STRFUNCS_H
    43.6 -
    43.7 -char *str_dup(const char *str);
    43.8 -char *str_ndup(const char *str,size_t n);
    43.9 -
   43.10 -#endif /* GC_STRFUNCS_H */

    44.1 --- a/gclib/textfileutils.c	Fri Jan 27 00:28:11 2012 +0000
    44.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    44.3 @@ -1,33 +0,0 @@
    44.4 -#include <stdlib.h>
    44.5 -#include <stdio.h>
    44.6 -#include <gclib/gclib.h>
    44.7 -
    44.8 -/*
    44.9 - * Read a file into memory (which should be freed with mem_free when no
   44.10 - * longer required). Returns NULL on error and outputs a suitable error
   44.11 - * message to stderr.
   44.12 - * DOS-style line endings are handled transparently even on platforms which
   44.13 - * don't normally use this format.
   44.14 - */
   44.15 -boolean file_get_contents_text(const char *filename,char **contents,
   44.16 -  size_t *length)
   44.17 -{
   44.18 -    int i;
   44.19 -    char *raw;
   44.20 -    size_t raw_length;
   44.21 -    String *string;
   44.22 -    if (!file_get_contents(filename,&raw,&raw_length))
   44.23 -	return FALSE;
   44.24 -    string=string_new(NULL);
   44.25 -    for(i=0;i<raw_length;i++)
   44.26 -	if (raw[i]!='\r')
   44.27 -	    string_append_c(string,raw[i]);
   44.28 -    mem_free(raw);
   44.29 -    if (length)
   44.30 -	*length=string->len;
   44.31 -    if (contents)
   44.32 -	*contents=string_free(string,FALSE);
   44.33 -    else
   44.34 -	string_free(string,TRUE);
   44.35 -    return TRUE;
   44.36 -}

    45.1 --- a/gclib/textfileutils.h	Fri Jan 27 00:28:11 2012 +0000
    45.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.3 @@ -1,9 +0,0 @@
    45.4 -#ifndef GC_TEXTFILEUTILS_H
    45.5 -#define GC_TEXTFILEUTILS_H
    45.6 -
    45.7 -#include <gclib/gclib.h>
    45.8 -
    45.9 -boolean file_get_contents_text(const char *filename,char **contents,
   45.10 -  size_t *length);
   45.11 -
   45.12 -#endif /* GC_TEXTFILEUTILS_H */

    46.1 --- a/gclib/types.h	Fri Jan 27 00:28:11 2012 +0000
    46.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.3 @@ -1,6 +0,0 @@
    46.4 -#ifndef GC_TYPES_H
    46.5 -#define GC_TYPES_H
    46.6 -
    46.7 -typedef int boolean;
    46.8 -
    46.9 -#endif	/* GC_TYPES_H */

    47.1 --- a/gclib/utils.c	Fri Jan 27 00:28:11 2012 +0000
    47.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    47.3 @@ -1,46 +0,0 @@
    47.4 -#include <stdlib.h>
    47.5 -#include <string.h>
    47.6 -#include <unistd.h>
    47.7 -#include <gclib/mem.h>
    47.8 -#include <gclib/strfuncs.h>
    47.9 -#include <gclib/utils.h>
   47.10 -
   47.11 -#define is_valid_drive(d)	((d)>='a' && (d)<='z' || (d)>='A' && (d)<='Z')
   47.12 -
   47.13 -/*
   47.14 - * Gets the last component of the filename. If filename ends with a directory
   47.15 - * separator it gets the component before the last slash. If filename consists
   47.16 - * only of directory separators (and on Windows, possibly a drive letter), a
   47.17 - * single separator is returned. If filename is empty, it gets ".".
   47.18 - */
   47.19 -char *path_get_basename(const char *filename)
   47.20 -{
   47.21 -    ssize_t base,last_nonslash;
   47.22 -    size_t len;
   47.23 -    char *retval;
   47.24 -    if (*filename=='\0')
   47.25 -        return str_dup(".");
   47.26 -    last_nonslash=strlen(filename)-1;
   47.27 -    while (last_nonslash>=0 && GC_IS_DIR_SEPARATOR(filename[last_nonslash]))
   47.28 -	last_nonslash--;
   47.29 -    if (last_nonslash<0)
   47.30 -	/* string only containing slashes */
   47.31 -    return str_dup(GC_DIR_SEPARATOR_S);
   47.32 -#ifdef WIN32
   47.33 -    if (last_nonslash==1 && is_valid_drive(filename[0]) && filename[1]==':')
   47.34 -	/* string only containing slashes and a drive */
   47.35 -	return str_dup(GC_DIR_SEPARATOR_S);
   47.36 -#endif
   47.37 -    base=last_nonslash;
   47.38 -    while (base>=0 && !GC_IS_DIR_SEPARATOR(filename[base]))
   47.39 -	base--;
   47.40 -#ifdef WIN32
   47.41 -    if (base==-1 && is_valid_drive(filename[0]) && filename[1] == ':')
   47.42 -	  base=1;
   47.43 -#endif
   47.44 -    len=last_nonslash-base;
   47.45 -    retval=mem_alloc(len+1,1);
   47.46 -    memcpy(retval,filename+base+1,len);
   47.47 -    retval[len]='\0';
   47.48 -    return retval;
   47.49 -}

    48.1 --- a/gclib/utils.h	Fri Jan 27 00:28:11 2012 +0000
    48.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.3 @@ -1,16 +0,0 @@
    48.4 -#ifndef GC_UTIL_H
    48.5 -#define GC_UTIL_H
    48.6 -
    48.7 -#ifdef WIN32
    48.8 -#define GC_DIR_SEPARATOR '\\'
    48.9 -#define GC_DIR_SEPARATOR_S "\\"
   48.10 -#define GC_IS_DIR_SEPARATOR(c) ((c)==GC_DIR_SEPARATOR || (c)=='/')
   48.11 -#else
   48.12 -#define GC_DIR_SEPARATOR '/'
   48.13 -#define GC_DIR_SEPARATOR_S "/"
   48.14 -#define GC_IS_DIR_SEPARATOR(c) ((c)==GC_DIR_SEPARATOR)
   48.15 -#endif
   48.16 -
   48.17 -char *path_get_basename(const char *filename);
   48.18 -
   48.19 -#endif /* GC_UTIL_H */

    49.1 --- a/gutcheck/Makefile.am	Fri Jan 27 00:28:11 2012 +0000
    49.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.3 @@ -1,8 +0,0 @@
    49.4 -bin_PROGRAMS=gutcheck
    49.5 -pkgdata_DATA=gutcheck.typ
    49.6 -
    49.7 -gutcheck.typ:	gutcheck.typ.in
    49.8 -	sed 's/$$/\r/' $< > $@
    49.9 -
   49.10 -EXTRA_DIST=gutcheck.typ.in
   49.11 -CLEANFILES=gutcheck.typ

    50.1 --- a/gutcheck/gutcheck.c	Fri Jan 27 00:28:11 2012 +0000
    50.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    50.3 @@ -1,2982 +0,0 @@
    50.4 -/*************************************************************************/
    50.5 -/* gutcheck - check for assorted weirdnesses in a PG candidate text file */
    50.6 -/*                                                                       */
    50.7 -/* Version 0.991                                                         */
    50.8 -/* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>                  */
    50.9 -/*                                                                       */
   50.10 -/* This program is free software; you can redistribute it and/or modify  */
   50.11 -/* it under the terms of the GNU General Public License as published by  */
   50.12 -/* the Free Software Foundation; either version 2 of the License, or     */
   50.13 -/* (at your option) any later version.                                   */
   50.14 -/*                                                                       */
   50.15 -/* This program is distributed in the hope that it will be useful,       */
   50.16 -/* but WITHOUT ANY WARRANTY; without even the implied warranty of        */
   50.17 -/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         */
   50.18 -/* GNU General Public License for more details.                          */
   50.19 -/*                                                                       */
   50.20 -/* You should have received a copy of the GNU General Public License     */
   50.21 -/* along with this program; if not, write to the                         */
   50.22 -/*      Free Software Foundation, Inc.,                                  */
   50.23 -/*      59 Temple Place,                                                 */
   50.24 -/*      Suite 330,                                                       */
   50.25 -/*      Boston, MA  02111-1307  USA                                      */
   50.26 -/*                                                                       */
   50.27 -/*                                                                       */
   50.28 -/*                                                                       */
   50.29 -/* Overview comments:                                                    */
   50.30 -/*                                                                       */
   50.31 -/* If you're reading this, you're either interested in how to detect     */
   50.32 -/* formatting errors, or very very bored.                                */
   50.33 -/*                                                                       */
   50.34 -/* Gutcheck is a homebrew formatting checker specifically for            */
   50.35 -/* spotting common formatting problems in a PG e-text. I typically       */
   50.36 -/* run it once or twice on a file I'm about to submit; it usually        */
   50.37 -/* finds a few formatting problems. It also usually finds lots of        */
   50.38 -/* queries that aren't problems at all; it _really_ doesn't like         */
   50.39 -/* the standard PG header, for example.  It's optimized for straight     */
   50.40 -/* prose; poetry and non-fiction involving tables tend to trigger        */
   50.41 -/* false alarms.                                                         */
   50.42 -/*                                                                       */
   50.43 -/* The code of gutcheck is not very interesting, but the experience      */
   50.44 -/* of what constitutes a possible error may be, and the best way to      */
   50.45 -/* illustrate that is by example.                                        */
   50.46 -/*                                                                       */
   50.47 -/*                                                                       */
   50.48 -/* Here are some common typos found in PG texts that gutcheck            */
   50.49 -/* will flag as errors:                                                  */
   50.50 -/*                                                                       */
   50.51 -/* "Look!John , over there!"                                             */
   50.52 -/* <this is a HTML tag>                                                  */
   50.53 -/* &so is this;                                                          */
   50.54 -/* Margaret said: " Now you should start for school."                    */
   50.55 -/* Margaret said: "Now you should start for school. (if end of para)     */
   50.56 -/* The horse is said to he worth a lot.                                  */
   50.57 -/* 0K - this'11 make you look close1y.                                   */
   50.58 -/* "If you do. you'll regret it!"                                        */
   50.59 -/*                                                                       */
   50.60 -/* There are some complications . The extra space left around that       */
   50.61 -/* period was an error . . . but that ellipsis wasn't.                   */
   50.62 -/*                                                                       */
   50.63 -/* The last line of a paragraph                                          */
   50.64 -/* is usually short.                                                     */
   50.65 -/*                                                                       */
   50.66 -/* This period is an error.But the periods in a.m. aren't.               */
   50.67 -/*                                                                       */
   50.68 -/* Checks that are do-able but not (well) implemented are:               */
   50.69 -/*        Single-quote chcking.                                          */
   50.70 -/*          Despite 3 attempts at it, singlequote checking is still      */
   50.71 -/*          crap in gutcheck. It may not be possible without analysis    */
   50.72 -/*          of the whole paragraph.                                      */
   50.73 -/*                                                                       */
   50.74 -/*************************************************************************/
   50.75 -
   50.76 -
   50.77 -#include <stdio.h>
   50.78 -#include <stdlib.h>
   50.79 -#include <string.h>
   50.80 -#include <ctype.h>
   50.81 -
   50.82 -#define MAXWORDLEN    80    /* max length of one word             */
   50.83 -#define LINEBUFSIZE 2048    /* buffer size for an input line      */
   50.84 -
   50.85 -#define MAX_USER_TYPOS 1000
   50.86 -#define USERTYPO_FILE "gutcheck.typ"
   50.87 -
   50.88 -#ifndef MAX_PATH
   50.89 -#define MAX_PATH 16384
   50.90 -#endif
   50.91 -
   50.92 -char aline[LINEBUFSIZE];
   50.93 -char prevline[LINEBUFSIZE];
   50.94 -
   50.95 -                 /* Common typos. */
   50.96 -char *typo[] = { "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane", "nad",
   50.97 -                "te", "ig", "acn",  "ahve", "alot", "anbd", "andt", "awya", "aywa", "bakc", "om",
   50.98 -                "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt", "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr",
   50.99 -                "hmi", "hse", "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd", "gerat", "goign",
  50.100 -                "gruop", "haev", "hda", "hearign", "seeign", "sayign", "herat", "hge", "hsa", "hsi", "hte", "htere",
  50.101 -                "htese", "htey", "htis", "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut", "loev",
  50.102 -                "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter", "omre", "onyl", "otehr", "otu", "owrk",
  50.103 -                "owuld", "peice", "peices", "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",
  50.104 -                "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe", "sohw", "stnad", "stopry",
  50.105 -                "stoyr", "stpo", "tahn", "taht", "tath", "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge",
  50.106 -                "thier", "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne", "tirne", "tkae",
  50.107 -                "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey", "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih",
  50.108 -                "whihc", "whta", "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",
  50.109 -                "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking", "wtih", "wuould", "wya", "yera",
  50.110 -                "yeras", "yersa", "yoiu", "youve", "ytou", "yuor",
  50.111 -                /* added h/b words for version 12 - removed a few with "tbe" v.25 */
  50.112 -                "abead", "ahle", "ahout", "ahove", "altbough", "balf", "bardly", "bas", "bave", "baving", "bebind", 
  50.113 -                "beld", "belp", "belped", "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge", "dehates", 
  50.114 -                "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan", "hegin", "heing", 
  50.115 -                "helieve", "henefit", "hetter", "hetween", "heyond", "hig", "higber", "huild", "huy", "hy", "jobn", "joh", 
  50.116 -                "meanwbile", "memher", "memhers", "numher", "numhers", 
  50.117 -                "perbaps", "prohlem", "puhlic", "witbout", 
  50.118 -                /* and a few more for .18 */
  50.119 -                "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud", "prornise", "prornised", "modem", "bo",
  50.120 -                "heside", "chapteb", "chaptee", "se",
  50.121 -                 ""};
  50.122 -
  50.123 -char *usertypo[MAX_USER_TYPOS];
  50.124 -
  50.125 -                 /* Common abbreviations and other OK words not to query as typos. */
  50.126 -                 /* 0.99 last-minute - removed "ms"      */
  50.127 -char *okword[] = {"mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm", "rd", "sh", "br",
  50.128 -                  "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "pompeii","hawaii","hawaiian",
  50.129 -                  "hotbed", "heartbeat", "heartbeats", "outbid", "outbids", "frostbite", "frostbitten",
  50.130 -                  ""};
  50.131 -
  50.132 -                 /* Common abbreviations that cause otherwise unexplained periods. */
  50.133 -char *abbrev[] = {"cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op", "cit",
  50.134 -                  "deg", "min", "chap", "oz", "mme", "mlle", "mssrs",
  50.135 -                  ""};
  50.136 -                 /* Two-Letter combinations that rarely if ever start words, */
  50.137 -                 /* but are common scannos or otherwise common letter        */
  50.138 -                 /* combinations.                                            */
  50.139 -char *nostart[] = { "hr", "hl", "cb", "sb", "tb", "wb", "tl",
  50.140 -                    "tn", "rn", "lt", "tj",
  50.141 -                    "" };
  50.142 -
  50.143 -                 /* Two-Letter combinations that rarely if ever end words    */
  50.144 -                 /* but are common scannos or otherwise common letter        */
  50.145 -                 /* combinations                                             */
  50.146 -char *noend[]   = { "cb", "gb", "pb", "sb", "tb", 
  50.147 -                    "wh","fr","br","qu","tw","gl","fl","sw","gr","sl","cl",
  50.148 -                    "iy",
  50.149 -                    ""};
  50.150 -
  50.151 -char *markup[]  = { "a", "b", "big", "blockquote", "body", "br", "center", 
  50.152 -                    "col", "div", "em", "font", "h1", "h2", "h3", "h4", 
  50.153 -                    "h5", "h6", "head", "hr", "html", "i", "img", "li", 
  50.154 -                    "meta", "ol", "p", "pre", "small", "span", "strong", 
  50.155 -                    "sub", "sup", "table", "td", "tfoot", "thead", "title", 
  50.156 -                    "tr", "tt", "u", "ul", 
  50.157 -                    ""};
  50.158 -
  50.159 -char *DPmarkup[] = { "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>",
  50.160 -                    ""}; /* <tb> added .991 */
  50.161 -
  50.162 -char *nocomma[]  = { "the", "it's", "their", "an", "mrs", "a", "our", "that's",
  50.163 -                     "its", "whose", "every", "i'll", "your", "my", 
  50.164 -                     "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "rd", 
  50.165 -                     "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", 
  50.166 -                     "i'm", "during", "let", "toward", "among",
  50.167 -                     ""};
  50.168 -
  50.169 -
  50.170 -char *noperiod[] = { "every", "i'm", "during", "that's", "their", "your", "our", "my", "or", 
  50.171 -                     "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether", 
  50.172 -                     "i'll", "whose", "who", "because", "when", "let", "till", "very",
  50.173 -                     "an", "among", "those", "into", "whom", "having", "thence",
  50.174 -                     ""}; 
  50.175 -
  50.176 -
  50.177 -char vowels[] = "aeiouàáâãäæèéêëìíîïòóôõöùúûü";  /* Carlo's old suggestion, updated .991 */
  50.178 -
  50.179 -struct {
  50.180 -    char *htmlent;
  50.181 -    char *htmlnum;
  50.182 -    char *textent;
  50.183 -    } entities[] = { "&amp;",           "&#38;",        "&", 
  50.184 -                     "&lt;",            "&#60;",        "<",
  50.185 -                     "&gt;",            "&#62;",        ">",
  50.186 -                     "&deg;",           "&#176;",       " degrees",
  50.187 -                     "&pound;",         "&#163;",       "L",
  50.188 -                     "&quot;",          "&#34;",        "\"",   /* -- quotation mark = APL quote, */
  50.189 -                     "&OElig;",         "&#338;",       "OE",  /* -- latin capital ligature OE, */
  50.190 -                     "&oelig;",         "&#339;",       "oe",  /* -- latin small ligature oe, U+0153 ISOlat2 --> */
  50.191 -                     "&Scaron;",        "&#352;",       "S",  /* -- latin capital letter S with caron, */
  50.192 -                     "&scaron;",        "&#353;",       "s",  /* -- latin small letter s with caron, */
  50.193 -                     "&Yuml;",          "&#376;",       "Y",  /* -- latin capital letter Y with diaeresis, */
  50.194 -                     "&circ;",          "&#710;",       "",  /* -- modifier letter circumflex accent, */
  50.195 -                     "&tilde;",         "&#732;",       "~",  /* -- small tilde, U+02DC ISOdia --> */
  50.196 -                     "&ensp;",          "&#8194;",      " ", /* -- en space, U+2002 ISOpub --> */
  50.197 -                     "&emsp;",          "&#8195;",      " ", /* -- em space, U+2003 ISOpub --> */
  50.198 -                     "&thinsp;",        "&#8201;",      " ", /* -- thin space, U+2009 ISOpub --> */
  50.199 -                     "&ndash;",         "&#8211;",      "-", /* -- en dash, U+2013 ISOpub --> */
  50.200 -                     "&mdash;",         "&#8212;",      "--", /* -- em dash, U+2014 ISOpub --> */
  50.201 -                     "&lsquo;",         "&#8216;",      "'", /* -- left single quotation mark, */
  50.202 -                     "&rsquo;",         "&#8217;",      "'", /* -- right single quotation mark, */
  50.203 -                     "&sbquo;",         "&#8218;",      "'", /* -- single low-9 quotation mark, U+201A NEW --> */
  50.204 -                     "&ldquo;",         "&#8220;",      "\"", /* -- left double quotation mark, */
  50.205 -                     "&rdquo;",         "&#8221;",      "\"", /* -- right double quotation mark, */
  50.206 -                     "&bdquo;",         "&#8222;",      "\"", /* -- double low-9 quotation mark, U+201E NEW --> */
  50.207 -                     "&lsaquo;",        "&#8249;",      "\"", /* -- single left-pointing angle quotation mark, */
  50.208 -                     "&rsaquo;",        "&#8250;",      "\"", /* -- single right-pointing angle quotation mark, */
  50.209 -                     "&nbsp;",          "&#160;",       " ", /* -- no-break space = non-breaking space, */
  50.210 -                     "&iexcl;",         "&#161;",       "!", /* -- inverted exclamation mark, U+00A1 ISOnum --> */
  50.211 -                     "&cent;",          "&#162;",       "c", /* -- cent sign, U+00A2 ISOnum --> */
  50.212 -                     "&pound;",         "&#163;",       "L", /* -- pound sign, U+00A3 ISOnum --> */
  50.213 -                     "&curren;",        "&#164;",       "$", /* -- currency sign, U+00A4 ISOnum --> */
  50.214 -                     "&yen;",           "&#165;",       "Y", /* -- yen sign = yuan sign, U+00A5 ISOnum --> */
  50.215 -                     "&sect;",          "&#167;",       "--", /* -- section sign, U+00A7 ISOnum --> */
  50.216 -                     "&uml;",           "&#168;",       " ", /* -- diaeresis = spacing diaeresis, */
  50.217 -                     "&copy;",          "&#169;",       "(C) ", /* -- copyright sign, U+00A9 ISOnum --> */
  50.218 -                     "&ordf;",          "&#170;",       " ", /* -- feminine ordinal indicator, U+00AA ISOnum --> */
  50.219 -                     "&laquo;",         "&#171;",       "\"", /* -- left-pointing double angle quotation mark */
  50.220 -                     "&shy;",           "&#173;",       "-", /* -- soft hyphen = discretionary hyphen, */
  50.221 -                     "&reg;",           "&#174;",       "(R) ", /* -- registered sign = registered trade mark sign, */
  50.222 -                     "&macr;",          "&#175;",       " ", /* -- macron = spacing macron = overline */
  50.223 -                     "&deg;",           "&#176;",       " degrees", /* -- degree sign, U+00B0 ISOnum --> */
  50.224 -                     "&plusmn;",        "&#177;",       "+-", /* -- plus-minus sign = plus-or-minus sign, */
  50.225 -                     "&sup2;",          "&#178;",       "2", /* -- superscript two = superscript digit two */
  50.226 -                     "&sup3;",          "&#179;",       "3", /* -- superscript three = superscript digit three */
  50.227 -                     "&acute;",         "&#180;",       " ", /* -- acute accent = spacing acute, */
  50.228 -                     "&micro;",         "&#181;",       "m", /* -- micro sign, U+00B5 ISOnum --> */
  50.229 -                     "&para;",          "&#182;",       "--", /* -- pilcrow sign = paragraph sign, */
  50.230 -                     "&cedil;",         "&#184;",       " ", /* -- cedilla = spacing cedilla, U+00B8 ISOdia --> */
  50.231 -                     "&sup1;",          "&#185;",       "1", /* -- superscript one = superscript digit one, */
  50.232 -                     "&ordm;",          "&#186;",       " ", /* -- masculine ordinal indicator, */
  50.233 -                     "&raquo;",         "&#187;",       "\"", /* -- right-pointing double angle quotation mark */
  50.234 -                     "&frac14;",        "&#188;",       "1/4", /* -- vulgar fraction one quarter */
  50.235 -                     "&frac12;",        "&#189;",       "1/2", /* -- vulgar fraction one half */
  50.236 -                     "&frac34;",        "&#190;",       "3/4", /* -- vulgar fraction three quarters */
  50.237 -                     "&iquest;",        "&#191;",       "?", /* -- inverted question mark */
  50.238 -                     "&Agrave;",        "&#192;",       "A", /* -- latin capital letter A with grave */
  50.239 -                     "&Aacute;",        "&#193;",       "A", /* -- latin capital letter A with acute, */
  50.240 -                     "&Acirc;",         "&#194;",       "A", /* -- latin capital letter A with circumflex, */
  50.241 -                     "&Atilde;",        "&#195;",       "A", /* -- latin capital letter A with tilde, */
  50.242 -                     "&Auml;",          "&#196;",       "A", /* -- latin capital letter A with diaeresis, */
  50.243 -                     "&Aring;",         "&#197;",       "A", /* -- latin capital letter A with ring above */
  50.244 -                     "&AElig;",         "&#198;",       "AE", /* -- latin capital letter AE */
  50.245 -                     "&Ccedil;",        "&#199;",       "C", /* -- latin capital letter C with cedilla, */
  50.246 -                     "&Egrave;",        "&#200;",       "E", /* -- latin capital letter E with grave, */
  50.247 -                     "&Eacute;",        "&#201;",       "E", /* -- latin capital letter E with acute, */
  50.248 -                     "&Ecirc;",         "&#202;",       "E", /* -- latin capital letter E with circumflex, */
  50.249 -                     "&Euml;",          "&#203;",       "E", /* -- latin capital letter E with diaeresis, */
  50.250 -                     "&Igrave;",        "&#204;",       "I", /* -- latin capital letter I with grave, */
  50.251 -                     "&Iacute;",        "&#205;",       "I", /* -- latin capital letter I with acute, */
  50.252 -                     "&Icirc;",         "&#206;",       "I", /* -- latin capital letter I with circumflex, */
  50.253 -                     "&Iuml;",          "&#207;",       "I", /* -- latin capital letter I with diaeresis, */
  50.254 -                     "&ETH;",           "&#208;",       "E", /* -- latin capital letter ETH, U+00D0 ISOlat1 --> */
  50.255 -                     "&Ntilde;",        "&#209;",       "N", /* -- latin capital letter N with tilde, */
  50.256 -                     "&Ograve;",        "&#210;",       "O", /* -- latin capital letter O with grave, */
  50.257 -                     "&Oacute;",        "&#211;",       "O", /* -- latin capital letter O with acute, */
  50.258 -                     "&Ocirc;",         "&#212;",       "O", /* -- latin capital letter O with circumflex, */
  50.259 -                     "&Otilde;",        "&#213;",       "O", /* -- latin capital letter O with tilde, */
  50.260 -                     "&Ouml;",          "&#214;",       "O", /* -- latin capital letter O with diaeresis, */
  50.261 -                     "&times;",         "&#215;",       "*", /* -- multiplication sign, U+00D7 ISOnum --> */
  50.262 -                     "&Oslash;",        "&#216;",       "O", /* -- latin capital letter O with stroke */
  50.263 -                     "&Ugrave;",        "&#217;",       "U", /* -- latin capital letter U with grave, */
  50.264 -                     "&Uacute;",        "&#218;",       "U", /* -- latin capital letter U with acute, */
  50.265 -                     "&Ucirc;",         "&#219;",       "U", /* -- latin capital letter U with circumflex, */
  50.266 -                     "&Uuml;",          "&#220;",       "U", /* -- latin capital letter U with diaeresis, */
  50.267 -                     "&Yacute;",        "&#221;",       "Y", /* -- latin capital letter Y with acute, */
  50.268 -                     "&THORN;",         "&#222;",       "TH", /* -- latin capital letter THORN, */
  50.269 -                     "&szlig;",         "&#223;",       "sz", /* -- latin small letter sharp s = ess-zed, */
  50.270 -                     "&agrave;",        "&#224;",       "a", /* -- latin small letter a with grave */
  50.271 -                     "&aacute;",        "&#225;",       "a", /* -- latin small letter a with acute, */
  50.272 -                     "&acirc;",         "&#226;",       "a", /* -- latin small letter a with circumflex, */
  50.273 -                     "&atilde;",        "&#227;",       "a", /* -- latin small letter a with tilde, */
  50.274 -                     "&auml;",          "&#228;",       "a", /* -- latin small letter a with diaeresis, */
  50.275 -                     "&aring;",         "&#229;",       "a", /* -- latin small letter a with ring above */
  50.276 -                     "&aelig;",         "&#230;",       "ae", /* -- latin small letter ae */
  50.277 -                     "&ccedil;",        "&#231;",       "c", /* -- latin small letter c with cedilla, */
  50.278 -                     "&egrave;",        "&#232;",       "e", /* -- latin small letter e with grave, */
  50.279 -                     "&eacute;",        "&#233;",       "e", /* -- latin small letter e with acute, */
  50.280 -                     "&ecirc;",         "&#234;",       "e", /* -- latin small letter e with circumflex, */
  50.281 -                     "&euml;",          "&#235;",       "e", /* -- latin small letter e with diaeresis, */
  50.282 -                     "&igrave;",        "&#236;",       "i", /* -- latin small letter i with grave, */
  50.283 -                     "&iacute;",        "&#237;",       "i", /* -- latin small letter i with acute, */
  50.284 -                     "&icirc;",         "&#238;",       "i", /* -- latin small letter i with circumflex, */
  50.285 -                     "&iuml;",          "&#239;",       "i", /* -- latin small letter i with diaeresis, */
  50.286 -                     "&eth;",           "&#240;",       "eth", /* -- latin small letter eth, U+00F0 ISOlat1 --> */
  50.287 -                     "&ntilde;",        "&#241;",       "n", /* -- latin small letter n with tilde, */
  50.288 -                     "&ograve;",        "&#242;",       "o", /* -- latin small letter o with grave, */
  50.289 -                     "&oacute;",        "&#243;",       "o", /* -- latin small letter o with acute, */
  50.290 -                     "&ocirc;",         "&#244;",       "o", /* -- latin small letter o with circumflex, */
  50.291 -                     "&otilde;",        "&#245;",       "o", /* -- latin small letter o with tilde, */
  50.292 -                     "&ouml;",          "&#246;",       "o", /* -- latin small letter o with diaeresis, */
  50.293 -                     "&divide;",        "&#247;",       "/", /* -- division sign, U+00F7 ISOnum --> */
  50.294 -                     "&oslash;",        "&#248;",       "o", /* -- latin small letter o with stroke, */
  50.295 -                     "&ugrave;",        "&#249;",       "u", /* -- latin small letter u with grave, */
  50.296 -                     "&uacute;",        "&#250;",       "u", /* -- latin small letter u with acute, */
  50.297 -                     "&ucirc;",         "&#251;",       "u", /* -- latin small letter u with circumflex, */
  50.298 -                     "&uuml;",          "&#252;",       "u", /* -- latin small letter u with diaeresis, */
  50.299 -                     "&yacute;",        "&#253;",       "y", /* -- latin small letter y with acute, */
  50.300 -                     "&thorn;",         "&#254;",       "th", /* -- latin small letter thorn, */
  50.301 -                     "&yuml;",          "&#255;",       "y", /* -- latin small letter y with diaeresis, */
  50.302 -                      "", "" };
  50.303 -                    
  50.304 -/* ---- list of special characters ---- */
  50.305 -#define CHAR_SPACE        32
  50.306 -#define CHAR_TAB           9
  50.307 -#define CHAR_LF           10
  50.308 -#define CHAR_CR           13
  50.309 -#define CHAR_DQUOTE       34
  50.310 -#define CHAR_SQUOTE       39
  50.311 -#define CHAR_OPEN_SQUOTE  96
  50.312 -#define CHAR_TILDE       126
  50.313 -#define CHAR_ASTERISK     42
  50.314 -#define CHAR_FORESLASH    47
  50.315 -#define CHAR_CARAT        94
  50.316 -
  50.317 -#define CHAR_UNDERSCORE    '_'
  50.318 -#define CHAR_OPEN_CBRACK   '{'
  50.319 -#define CHAR_CLOSE_CBRACK  '}'
  50.320 -#define CHAR_OPEN_RBRACK   '('
  50.321 -#define CHAR_CLOSE_RBRACK  ')'
  50.322 -#define CHAR_OPEN_SBRACK   '['
  50.323 -#define CHAR_CLOSE_SBRACK  ']'
  50.324 -
  50.325 -
  50.326 -
  50.327 -
  50.328 -
  50.329 -/* ---- longest and shortest normal PG line lengths ----*/
  50.330 -#define LONGEST_PG_LINE   75
  50.331 -#define WAY_TOO_LONG      80
  50.332 -#define SHORTEST_PG_LINE  55
  50.333 -
  50.334 -#define SWITCHES "ESTPXLOYHWVMUD" /* switches:-                            */
  50.335 -                                  /*     D - ignore DP-specific markup     */
  50.336 -                                  /*     E - echo queried line             */
  50.337 -                                  /*     S - check single quotes           */
  50.338 -                                  /*     T - check common typos            */
  50.339 -                                  /*     P - require closure of quotes on  */
  50.340 -                                  /*         every paragraph               */
  50.341 -                                  /*     X - "Trust no one" :-) Paranoid!  */
  50.342 -                                  /*         Queries everything            */
  50.343 -                                  /*     L - line end checking defaults on */
  50.344 -                                  /*         -L turns it off               */
  50.345 -                                  /*     O - overview. Just shows counts.  */
  50.346 -                                  /*     Y - puts errors to stdout         */
  50.347 -                                  /*         instead of stderr             */
  50.348 -                                  /*     H - Echoes header fields          */
  50.349 -                                  /*     M - Ignore markup in < >          */
  50.350 -                                  /*     U - Use file of User-defined Typos*/
  50.351 -                                  /*     W - Defaults for use on Web upload*/
  50.352 -                                  /*     V - Verbose - list EVERYTHING!    */
  50.353 -#define SWITNO 14                 /* max number of switch parms            */
  50.354 -                                  /*        - used for defining array-size */
  50.355 -#define MINARGS   1               /* minimum no of args excl switches      */
  50.356 -#define MAXARGS   1               /* maximum no of args excl switches      */
  50.357 -
  50.358 -int pswit[SWITNO];                /* program switches set by SWITCHES      */
  50.359 -
  50.360 -#define ECHO_SWITCH      0
  50.361 -#define SQUOTE_SWITCH    1
  50.362 -#define TYPO_SWITCH      2
  50.363 -#define QPARA_SWITCH     3
  50.364 -#define PARANOID_SWITCH  4
  50.365 -#define LINE_END_SWITCH  5
  50.366 -#define OVERVIEW_SWITCH  6
  50.367 -#define STDOUT_SWITCH    7
  50.368 -#define HEADER_SWITCH    8
  50.369 -#define WEB_SWITCH       9
  50.370 -#define VERBOSE_SWITCH   10
  50.371 -#define MARKUP_SWITCH    11
  50.372 -#define USERTYPO_SWITCH  12
  50.373 -#define DP_SWITCH        13
  50.374 -
  50.375 -
  50.376 -
  50.377 -long cnt_dquot;       /* for overview mode, count of doublequote queries */
  50.378 -long cnt_squot;       /* for overview mode, count of singlequote queries */
  50.379 -long cnt_brack;       /* for overview mode, count of brackets queries */
  50.380 -long cnt_bin;         /* for overview mode, count of non-ASCII queries */
  50.381 -long cnt_odd;         /* for overview mode, count of odd character queries */
  50.382 -long cnt_long;        /* for overview mode, count of long line errors */
  50.383 -long cnt_short;       /* for overview mode, count of short line queries */
  50.384 -long cnt_punct;       /* for overview mode, count of punctuation and spacing queries */
  50.385 -long cnt_dash;        /* for overview mode, count of dash-related queries */
  50.386 -long cnt_word;        /* for overview mode, count of word queries */
  50.387 -long cnt_html;        /* for overview mode, count of html queries */
  50.388 -long cnt_lineend;     /* for overview mode, count of line-end queries */
  50.389 -long cnt_spacend;     /* count of lines with space at end  V .21 */
  50.390 -long linecnt;         /* count of total lines in the file */
  50.391 -long checked_linecnt; /* count of lines actually gutchecked V .26 */
  50.392 -
  50.393 -void proghelp(void);
  50.394 -void procfile(char *);
  50.395 -
  50.396 -#define LOW_THRESHOLD    0
  50.397 -#define HIGH_THRESHOLD   1
  50.398 -
  50.399 -#define START 0
  50.400 -#define END 1
  50.401 -#define PREV 0
  50.402 -#define NEXT 1
  50.403 -#define FIRST_OF_PAIR 0
  50.404 -#define SECOND_OF_PAIR 1
  50.405 -
  50.406 -#define MAX_WORDPAIR 1000
  50.407 -
  50.408 -char running_from[MAX_PATH];
  50.409 -
  50.410 -int mixdigit(char *);
  50.411 -char *getaword(char *, char *);
  50.412 -int matchword(char *, char *);
  50.413 -char *flgets(char *, int, FILE *, long);
  50.414 -void lowerit(char *);
  50.415 -int gcisalpha(unsigned char);
  50.416 -int gcisdigit(unsigned char);
  50.417 -int gcisletter(unsigned char);
  50.418 -char *gcstrchr(char *s, char c);
  50.419 -void postprocess_for_HTML(char *);
  50.420 -char *linehasmarkup(char *);
  50.421 -char *losemarkup(char *);
  50.422 -int tagcomp(char *, char *);
  50.423 -char *loseentities(char *);
  50.424 -int isroman(char *);
  50.425 -int usertypo_count;
  50.426 -void postprocess_for_DP(char *);
  50.427 -
  50.428 -char wrk[LINEBUFSIZE];
  50.429 -
  50.430 -/* This is disgustingly lazy, predefining max words & lengths,   */
  50.431 -/* but now I'm out of 16-bit restrictions, what's a couple of K? */
  50.432 -#define MAX_QWORD           50
  50.433 -#define MAX_QWORD_LENGTH    40
  50.434 -char qword[MAX_QWORD][MAX_QWORD_LENGTH];
  50.435 -char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
  50.436 -signed int dupcnt[MAX_QWORD];
  50.437 -
  50.438 -
  50.439 -
  50.440 -
  50.441 -int main(int argc, char **argv)
  50.442 -{
  50.443 -    char *argsw, *s;
  50.444 -    int i, switno, invarg;
  50.445 -    char usertypo_file[MAX_PATH];
  50.446 -    FILE *usertypofile;
  50.447 -
  50.448 -
  50.449 -    if (strlen(argv[0]) < sizeof(running_from))
  50.450 -        strcpy(running_from, argv[0]);  /* save the path to the executable gutcheck */
  50.451 -
  50.452 -    /* find out what directory we're running from */
  50.453 -    for (s = running_from + strlen(running_from); *s != '/' && *s != '\\' && s >= running_from; s--)
  50.454 -        *s = 0;
  50.455 -
  50.456 -
  50.457 -    switno = strlen(SWITCHES);
  50.458 -    for (i = switno ; --i >0 ; )
  50.459 -        pswit[i] = 0;           /* initialise switches */
  50.460 -
  50.461 -    /* Standard loop to extract switches.                   */
  50.462 -    /* When we come out of this loop, the arguments will be */
  50.463 -    /* in argv[0] upwards and the switches used will be     */
  50.464 -    /* represented by their equivalent elements in pswit[]  */
  50.465 -    while ( --argc > 0 && **++argv == '-')
  50.466 -        for (argsw = argv[0]+1; *argsw !='\0'; argsw++)
  50.467 -            for (i = switno, invarg = 1; (--i >= 0) && invarg == 1 ; )
  50.468 -                if ((toupper(*argsw)) == SWITCHES[i] ) {
  50.469 -                    invarg = 0;
  50.470 -                    pswit[i] = 1;
  50.471 -                    }
  50.472 -
  50.473 -    pswit[PARANOID_SWITCH] ^= 1;         /* Paranoid checking is turned OFF, not on, by its switch */
  50.474 -
  50.475 -    if (pswit[PARANOID_SWITCH]) {                         /* if running in paranoid mode */
  50.476 -        pswit[TYPO_SWITCH] = pswit[TYPO_SWITCH] ^ 1;      /* force typo checks as well   */
  50.477 -        }                                                 /* v.20 removed s and p switches from paranoid mode */
  50.478 -
  50.479 -    pswit[LINE_END_SWITCH] ^= 1;         /* Line-end checking is turned OFF, not on, by its switch */
  50.480 -    pswit[ECHO_SWITCH] ^= 1;             /* V.21 Echoing is turned OFF, not on, by its switch      */
  50.481 -
  50.482 -    if (pswit[OVERVIEW_SWITCH])       /* just print summary; don't echo */
  50.483 -        pswit[ECHO_SWITCH] = 0;
  50.484 -
  50.485 -    /* Web uploads - for the moment, this is really just a placeholder     */
  50.486 -    /* until we decide what processing we really want to do on web uploads */
  50.487 -    if (pswit[WEB_SWITCH]) {          /* specific override for web uploads */
  50.488 -        pswit[ECHO_SWITCH] =     1;
  50.489 -        pswit[SQUOTE_SWITCH] =   0;
  50.490 -        pswit[TYPO_SWITCH] =     1;
  50.491 -        pswit[QPARA_SWITCH] =    0;
  50.492 -        pswit[PARANOID_SWITCH] = 1;
  50.493 -        pswit[LINE_END_SWITCH] = 0;
  50.494 -        pswit[OVERVIEW_SWITCH] = 0;
  50.495 -        pswit[STDOUT_SWITCH] =   0;
  50.496 -        pswit[HEADER_SWITCH] =   1;
  50.497 -        pswit[VERBOSE_SWITCH] =  0;
  50.498 -        pswit[MARKUP_SWITCH] =   0;
  50.499 -        pswit[USERTYPO_SWITCH] = 0;
  50.500 -        pswit[DP_SWITCH] = 0;
  50.501 -        }
  50.502 -
  50.503 -
  50.504 -    if (argc < MINARGS || argc > MAXARGS) {  /* check number of args */
  50.505 -        proghelp();
  50.506 -        return(1);            /* exit */
  50.507 -        }
  50.508 -
  50.509 -
  50.510 -    /* read in the user-defined stealth scanno list */
  50.511 -
  50.512 -    if (pswit[USERTYPO_SWITCH]) {                    /* ... we were told we had one! */
  50.513 -        if ((usertypofile = fopen(USERTYPO_FILE, "rb")) == NULL) {   /* not in cwd. try gutcheck directory. */
  50.514 -            strcpy(usertypo_file, running_from);
  50.515 -            strcat(usertypo_file, USERTYPO_FILE);
  50.516 -            if ((usertypofile = fopen(usertypo_file, "rb")) == NULL) {  /* we ain't got no user typo file! */
  50.517 -                printf("   --> I couldn't find gutcheck.typ -- proceeding without user typos.\n");
  50.518 -                }
  50.519 -            }
  50.520 -
  50.521 -        usertypo_count = 0;
  50.522 -        if (usertypofile) {  /* we managed to open a User Typo File! */
  50.523 -            if (pswit[USERTYPO_SWITCH]) {
  50.524 -                while (flgets(aline, LINEBUFSIZE-1, usertypofile, (long)usertypo_count)) {
  50.525 -                    if (strlen(aline) > 1) {
  50.526 -                        if ((int)*aline > 33) {
  50.527 -                            s = malloc(strlen(aline)+1);
  50.528 -                            if (!s) {
  50.529 -                                fprintf(stderr, "gutcheck: cannot get enough memory for user typo file!!\n");
  50.530 -                                exit(1);
  50.531 -                                }
  50.532 -                            strcpy(s, aline);
  50.533 -                            usertypo[usertypo_count] = s;
  50.534 -                            usertypo_count++;
  50.535 -                            if (usertypo_count >= MAX_USER_TYPOS) {
  50.536 -                                printf("   --> Only %d user-defined typos allowed: ignoring the rest\n");
  50.537 -                                break;
  50.538 -                                }
  50.539 -                            }
  50.540 -                        }
  50.541 -                    }
  50.542 -                }
  50.543 -            fclose(usertypofile);
  50.544 -            }
  50.545 -        }
  50.546 -
  50.547 -
  50.548 -
  50.549 -
  50.550 -    fprintf(stderr, "gutcheck: Check and report on an e-text\n");
  50.551 -
  50.552 -    cnt_dquot = cnt_squot = cnt_brack = cnt_bin = cnt_odd = cnt_long =
  50.553 -    cnt_short = cnt_punct = cnt_dash = cnt_word = cnt_html = cnt_lineend =
  50.554 -    cnt_spacend = 0;
  50.555 -
  50.556 -    procfile(argv[0]);
  50.557 -
  50.558 -    if (pswit[OVERVIEW_SWITCH]) {
  50.559 -                         printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
  50.560 -                            checked_linecnt, linecnt, linecnt - checked_linecnt);
  50.561 -                         printf("    --------------- Queries found --------------\n");
  50.562 -        if (cnt_long)    printf("    Long lines:                             %5ld\n",cnt_long);
  50.563 -        if (cnt_short)   printf("    Short lines:                            %5ld\n",cnt_short);
  50.564 -        if (cnt_lineend) printf("    Line-end problems:                      %5ld\n",cnt_lineend);
  50.565 -        if (cnt_word)    printf("    Common typos:                           %5ld\n",cnt_word);
  50.566 -        if (cnt_dquot)   printf("    Unmatched quotes:                       %5ld\n",cnt_dquot);
  50.567 -        if (cnt_squot)   printf("    Unmatched SingleQuotes:                 %5ld\n",cnt_squot);
  50.568 -        if (cnt_brack)   printf("    Unmatched brackets:                     %5ld\n",cnt_brack);
  50.569 -        if (cnt_bin)     printf("    Non-ASCII characters:                   %5ld\n",cnt_bin);
  50.570 -        if (cnt_odd)     printf("    Proofing characters:                    %5ld\n",cnt_odd);
  50.571 -        if (cnt_punct)   printf("    Punctuation & spacing queries:          %5ld\n",cnt_punct);
  50.572 -        if (cnt_dash)    printf("    Non-standard dashes:                    %5ld\n",cnt_dash);
  50.573 -        if (cnt_html)    printf("    Possible HTML tags:                     %5ld\n",cnt_html);
  50.574 -        printf("\n");
  50.575 -        printf("    TOTAL QUERIES                           %5ld\n",
  50.576 -            cnt_dquot + cnt_squot + cnt_brack + cnt_bin + cnt_odd + cnt_long +
  50.577 -            cnt_short + cnt_punct + cnt_dash + cnt_word + cnt_html + cnt_lineend);
  50.578 -        }
  50.579 -
  50.580 -    return(0);
  50.581 -}
  50.582 -
  50.583 -
  50.584 -
  50.585 -/* procfile - process one file */
  50.586 -
  50.587 -void procfile(char *filename)
  50.588 -{
  50.589 -
  50.590 -    char *s, *t, *s1, laststart, *wordstart;
  50.591 -    char inword[MAXWORDLEN], testword[MAXWORDLEN];
  50.592 -    char parastart[81];     /* first line of current para */
  50.593 -    FILE *infile;
  50.594 -    long quot, squot, firstline, alphalen, totlen, binlen,
  50.595 -         shortline, longline, verylongline, spacedash, emdash,
  50.596 -         space_emdash, non_PG_space_emdash, PG_space_emdash,
  50.597 -         footerline, dotcomma, start_para_line, astline, fslashline,
  50.598 -         standalone_digit, hyphens, htmcount, endquote_count;
  50.599 -    long spline, nspline;
  50.600 -    signed int i, j, llen, isemptyline, isacro, isellipsis, istypo, alower,
  50.601 -         eNon_A, eTab, eTilde, eAst, eFSlash, eCarat;
  50.602 -    signed int warn_short, warn_long, warn_bin, warn_dash, warn_dotcomma,
  50.603 -         warn_ast, warn_fslash, warn_digit, warn_hyphen, warn_endquote;
  50.604 -    unsigned int lastlen, lastblen;
  50.605 -    signed int s_brack, c_brack, r_brack, c_unders;
  50.606 -    signed int open_single_quote, close_single_quote, guessquote, dquotepar, squotepar;
  50.607 -    signed int isnewpara, vowel, consonant;
  50.608 -    char dquote_err[80], squote_err[80], rbrack_err[80], sbrack_err[80], cbrack_err[80],
  50.609 -         unders_err[80];
  50.610 -    signed int qword_index, qperiod_index, isdup;
  50.611 -    signed int enddash;
  50.612 -    signed int Dutchcount, isDutch, Frenchcount, isFrench;
  50.613 -
  50.614 -
  50.615 -    
  50.616 -
  50.617 -
  50.618 -    laststart = CHAR_SPACE;
  50.619 -    lastlen = lastblen = 0;
  50.620 -    *dquote_err = *squote_err = *rbrack_err = *cbrack_err = *sbrack_err =
  50.621 -        *unders_err = *prevline = 0;
  50.622 -    linecnt = firstline = alphalen = totlen = binlen =
  50.623 -        shortline = longline = spacedash = emdash = checked_linecnt =
  50.624 -        space_emdash = non_PG_space_emdash = PG_space_emdash =
  50.625 -        footerline = dotcomma = start_para_line = astline = fslashline = 
  50.626 -        standalone_digit = hyphens = htmcount = endquote_count = 0;
  50.627 -    quot = squot = s_brack = c_brack = r_brack = c_unders = 0;
  50.628 -    i = llen = isemptyline = isacro = isellipsis = istypo = 0;
  50.629 -    warn_short = warn_long = warn_bin = warn_dash = warn_dotcomma = 
  50.630 -        warn_ast = warn_fslash = warn_digit = warn_endquote = 0;
  50.631 -    isnewpara = vowel = consonant = enddash = 0;
  50.632 -    spline = nspline = 0;
  50.633 -    qword_index = qperiod_index = isdup = 0;
  50.634 -    *inword = *testword = 0;
  50.635 -    open_single_quote = close_single_quote = guessquote = dquotepar = squotepar = 0;
  50.636 -    Dutchcount = isDutch = Frenchcount = isFrench = 0;
  50.637 -
  50.638 -
  50.639 -    for (j = 0; j < MAX_QWORD; j++) {
  50.640 -        dupcnt[j] = 0;
  50.641 -        for (i = 0; i < MAX_QWORD_LENGTH; i++)
  50.642 -            qword[i][j] = 0;
  50.643 -            qperiod[i][j] = 0;
  50.644 -            }
  50.645 -
  50.646 -
  50.647 -    if ((infile = fopen(filename, "rb")) == NULL) {
  50.648 -        if (pswit[STDOUT_SWITCH])
  50.649 -            fprintf(stdout, "gutcheck: cannot open %s\n", filename);
  50.650 -        else
  50.651 -            fprintf(stderr, "gutcheck: cannot open %s\n", filename);
  50.652 -        exit(1);
  50.653 -        }
  50.654 -
  50.655 -    fprintf(stdout, "\n\nFile: %s\n\n", filename);
  50.656 -    firstline = shortline = longline = verylongline = 0;
  50.657 -
  50.658 -
  50.659 -    /*****************************************************/
  50.660 -    /*                                                   */
  50.661 -    /*  Run a first pass - verify that it's a valid PG   */
  50.662 -    /*  file, decide whether to report some things that  */
  50.663 -    /*  occur many times in the text like long or short  */
  50.664 -    /*  lines, non-standard dashes, and other good stuff */
  50.665 -    /*  I'll doubtless think of later.                   */
  50.666 -    /*                                                   */
  50.667 -    /*****************************************************/
  50.668 -
  50.669 -    /*****************************************************/
  50.670 -    /* V.24  Sigh. Yet Another Header Change             */
  50.671 -    /*****************************************************/
  50.672 -
  50.673 -    while (fgets(aline, LINEBUFSIZE-1, infile)) {
  50.674 -        while (aline[strlen(aline)-1] == 10 || aline[strlen(aline)-1] == 13 ) aline[strlen(aline)-1] = 0;
  50.675 -        linecnt++;
  50.676 -        if (strstr(aline, "*END") && strstr(aline, "SMALL PRINT") && (strstr(aline, "PUBLIC DOMAIN") || strstr(aline, "COPYRIGHT"))) {
  50.677 -            if (spline)
  50.678 -                printf("   --> Duplicate header?\n");
  50.679 -            spline = linecnt + 1;   /* first line of non-header text, that is */
  50.680 -            }
  50.681 -        if (!strncmp(aline, "*** START", 9) && strstr(aline, "PROJECT GUTENBERG")) {
  50.682 -            if (nspline)
  50.683 -                printf("   --> Duplicate header?\n");
  50.684 -            nspline = linecnt + 1;   /* first line of non-header text, that is */
  50.685 -            }
  50.686 -        if (spline || nspline) {
  50.687 -            lowerit(aline);
  50.688 -            if (strstr(aline, "end") && strstr(aline, "project gutenberg")) {
  50.689 -                if (strstr(aline, "end") < strstr(aline, "project gutenberg")) {
  50.690 -                    if (footerline) {
  50.691 -                        if (!nspline) /* it's an old-form header - we can detect duplicates */
  50.692 -                            printf("   --> Duplicate footer?\n");
  50.693 -                        else 
  50.694 -                            ;
  50.695 -                        }
  50.696 -                    else {
  50.697 -                        footerline = linecnt;
  50.698 -                        }
  50.699 -                    }
  50.700 -                }
  50.701 -            }
  50.702 -        if (spline) firstline = spline;
  50.703 -        if (nspline) firstline = nspline;  /* override with new */
  50.704 -
  50.705 -        if (footerline) continue;    /* 0.99+ don't count the boilerplate in the footer */
  50.706 -
  50.707 -        llen = strlen(aline);
  50.708 -        totlen += llen;
  50.709 -        for (i = 0; i < llen; i++) {
  50.710 -            if ((unsigned char)aline[i] > 127) binlen++;
  50.711 -            if (gcisalpha(aline[i])) alphalen++;
  50.712 -            if (i > 0)
  50.713 -                if (aline[i] == CHAR_DQUOTE && isalpha(aline[i-1]))
  50.714 -                    endquote_count++;
  50.715 -            }
  50.716 -        if (strlen(aline) > 2
  50.717 -            && lastlen > 2 && lastlen < SHORTEST_PG_LINE
  50.718 -            && lastblen > 2 && lastblen > SHORTEST_PG_LINE
  50.719 -            && laststart != CHAR_SPACE)
  50.720 -                shortline++;
  50.721 -
  50.722 -        if (*aline) /* fixed line below for 0.96 */
  50.723 -            if ((unsigned char)aline[strlen(aline)-1] <= CHAR_SPACE) cnt_spacend++;
  50.724 -
  50.725 -        if (strstr(aline, ".,")) dotcomma++;
  50.726 -        /* 0.98 only count ast lines for ignoring purposes where there is */
  50.727 -        /* locase text on the line */
  50.728 -        if (strstr(aline, "*")) {
  50.729 -            for (s = aline; *s; s++)
  50.730 -                if (*s >='a' && *s <= 'z')
  50.731 -                    break;
  50.732 -             if (*s) astline++;
  50.733 -             }
  50.734 -        if (strstr(aline, "/"))
  50.735 -            fslashline++;
  50.736 -        for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
  50.737 -        if (aline[i] == '-' && aline[i-1] != '-') hyphens++;
  50.738 -
  50.739 -        if (llen > LONGEST_PG_LINE) longline++;
  50.740 -        if (llen > WAY_TOO_LONG) verylongline++;
  50.741 -
  50.742 -        if (strstr(aline, "<") && strstr(aline, ">")) {
  50.743 -            i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
  50.744 -            if (i > 0) 
  50.745 -                htmcount++;
  50.746 -            if (strstr(aline, "<i>")) htmcount +=4; /* bonus marks! */
  50.747 -            }
  50.748 -
  50.749 -        /* Check for spaced em-dashes */
  50.750 -        if (strstr(aline,"--")) {
  50.751 -            emdash++;
  50.752 -            if (*(strstr(aline, "--")-1) == CHAR_SPACE ||
  50.753 -               (*(strstr(aline, "--")+2) == CHAR_SPACE))
  50.754 -                    space_emdash++;
  50.755 -            if (*(strstr(aline, "--")-1) == CHAR_SPACE &&
  50.756 -               (*(strstr(aline, "--")+2) == CHAR_SPACE))
  50.757 -                    non_PG_space_emdash++;             /* count of em-dashes with spaces both sides */
  50.758 -            if (*(strstr(aline, "--")-1) != CHAR_SPACE &&
  50.759 -               (*(strstr(aline, "--")+2) != CHAR_SPACE))
  50.760 -                    PG_space_emdash++;                 /* count of PG-type em-dashes with no spaces */
  50.761 -            }
  50.762 -
  50.763 -        for (s = aline; *s;) {
  50.764 -            s = getaword(s, inword);
  50.765 -            if (!strcmp(inword, "hij") || !strcmp(inword, "niet")) 
  50.766 -                Dutchcount++;
  50.767 -            if (!strcmp(inword, "dans") || !strcmp(inword, "avec")) 
  50.768 -                Frenchcount++;
  50.769 -            if (!strcmp(inword, "0") || !strcmp(inword, "1")) 
  50.770 -                standalone_digit++;
  50.771 -            }
  50.772 -
  50.773 -        /* Check for spaced dashes */
  50.774 -        if (strstr(aline," -"))
  50.775 -            if (*(strstr(aline, " -")+2) != '-')
  50.776 -                    spacedash++;
  50.777 -        lastblen = lastlen;
  50.778 -        lastlen = strlen(aline);
  50.779 -        laststart = aline[0];
  50.780 -
  50.781 -        }
  50.782 -    fclose(infile);
  50.783 -
  50.784 -
  50.785 -    /* now, based on this quick view, make some snap decisions */
  50.786 -    if (cnt_spacend > 0) {
  50.787 -        printf("   --> %ld lines in this file have white space at end\n", cnt_spacend);
  50.788 -        }
  50.789 -
  50.790 -    warn_dotcomma = 1;
  50.791 -    if (dotcomma > 5) {
  50.792 -        warn_dotcomma = 0;
  50.793 -        printf("   --> %ld lines in this file contain '.,'. Not reporting them.\n", dotcomma);
  50.794 -        }
  50.795 -
  50.796 -    /* if more than 50 lines, or one-tenth, are short, don't bother reporting them */
  50.797 -    warn_short = 1;
  50.798 -    if (shortline > 50 || shortline * 10 > linecnt) {
  50.799 -        warn_short = 0;
  50.800 -        printf("   --> %ld lines in this file are short. Not reporting short lines.\n", shortline);
  50.801 -        }
  50.802 -
  50.803 -    /* if more than 50 lines, or one-tenth, are long, don't bother reporting them */
  50.804 -    warn_long = 1;
  50.805 -    if (longline > 50 || longline * 10 > linecnt) {
  50.806 -        warn_long = 0;
  50.807 -        printf("   --> %ld lines in this file are long. Not reporting long lines.\n", longline);
  50.808 -        }
  50.809 -
  50.810 -    /* if more than 10 lines contain asterisks, don't bother reporting them V.0.97 */
  50.811 -    warn_ast = 1;
  50.812 -    if (astline > 10 ) {
  50.813 -        warn_ast = 0;
  50.814 -        printf("   --> %ld lines in this file contain asterisks. Not reporting them.\n", astline);
  50.815 -        }
  50.816 -
  50.817 -    /* if more than 10 lines contain forward slashes, don't bother reporting them V.0.99 */
  50.818 -    warn_fslash = 1;
  50.819 -    if (fslashline > 10 ) {
  50.820 -        warn_fslash = 0;
  50.821 -        printf("   --> %ld lines in this file contain forward slashes. Not reporting them.\n", fslashline);
  50.822 -        }
  50.823 -
  50.824 -    /* if more than 20 lines contain unpunctuated endquotes, don't bother reporting them V.0.99 */
  50.825 -    warn_endquote = 1;
  50.826 -    if (endquote_count > 20 ) {
  50.827 -        warn_endquote = 0;
  50.828 -        printf("   --> %ld lines in this file contain unpunctuated endquotes. Not reporting them.\n", endquote_count);
  50.829 -        }
  50.830 -
  50.831 -    /* if more than 15 lines contain standalone digits, don't bother reporting them V.0.97 */
  50.832 -    warn_digit = 1;
  50.833 -    if (standalone_digit > 10 ) {
  50.834 -        warn_digit = 0;
  50.835 -        printf("   --> %ld lines in this file contain standalone 0s and 1s. Not reporting them.\n", standalone_digit);
  50.836 -        }
  50.837 -
  50.838 -    /* if more than 20 lines contain hyphens at end, don't bother reporting them V.0.98 */
  50.839 -    warn_hyphen = 1;
  50.840 -    if (hyphens > 20 ) {
  50.841 -        warn_hyphen = 0;
  50.842 -        printf("   --> %ld lines in this file have hyphens at end. Not reporting them.\n", hyphens);
  50.843 -        }
  50.844 -
  50.845 -    if (htmcount > 20 && !pswit[MARKUP_SWITCH]) {
  50.846 -        printf("   --> Looks like this is HTML. Switching HTML mode ON.\n");
  50.847 -        pswit[MARKUP_SWITCH] = 1;
  50.848 -        }
  50.849 -        
  50.850 -    if (verylongline > 0) {
  50.851 -        printf("   --> %ld lines in this file are VERY long!\n", verylongline);
  50.852 -        }
  50.853 -
  50.854 -    /* If there are more non-PG spaced dashes than PG em-dashes,    */
  50.855 -    /* assume it's deliberate                                       */
  50.856 -    /* Current PG guidelines say don't use them, but older texts do,*/
  50.857 -    /* and some people insist on them whatever the guidelines say.  */
  50.858 -    /* V.20 removed requirement that PG_space_emdash be greater than*/
  50.859 -    /* ten before turning off warnings about spaced dashes.         */
  50.860 -    warn_dash = 1;
  50.861 -    if (spacedash + non_PG_space_emdash > PG_space_emdash) {
  50.862 -        warn_dash = 0;
  50.863 -        printf("   --> There are %ld spaced dashes and em-dashes. Not reporting them.\n", spacedash + non_PG_space_emdash);
  50.864 -        }
  50.865 -
  50.866 -    /* if more than a quarter of characters are hi-bit, bug out */
  50.867 -    warn_bin = 1;
  50.868 -    if (binlen * 4 > totlen) {
  50.869 -        printf("   --> This file does not appear to be ASCII. Terminating. Best of luck with it!\n");
  50.870 -        exit(1);
  50.871 -        }
  50.872 -    if (alphalen * 4 < totlen) {
  50.873 -        printf("   --> This file does not appear to be text. Terminating. Best of luck with it!\n");
  50.874 -        exit(1);
  50.875 -        }
  50.876 -    if ((binlen * 100 > totlen) || (binlen > 100)) {
  50.877 -        printf("   --> There are a lot of foreign letters here. Not reporting them.\n");
  50.878 -        warn_bin = 0;
  50.879 -        }
  50.880 -
  50.881 -    /* isDutch and isFrench added .991 Feb 06 for Frank, Jeroen, Renald */
  50.882 -    isDutch = 0;
  50.883 -    if (Dutchcount > 50) {
  50.884 -        isDutch = 1;
  50.885 -        printf("   --> This looks like Dutch - switching off dashes and warnings for 's Middags case.\n");
  50.886 -        }
  50.887 -
  50.888 -    isFrench = 0;
  50.889 -    if (Frenchcount > 50) {
  50.890 -        isFrench = 1;
  50.891 -        printf("   --> This looks like French - switching off some doublepunct.\n");
  50.892 -        }
  50.893 -
  50.894 -    if (firstline && footerline)
  50.895 -        printf("    The PG header and footer appear to be already on.\n");
  50.896 -    else {
  50.897 -        if (firstline)
  50.898 -            printf("    The PG header is on - no footer.\n");
  50.899 -        if (footerline)
  50.900 -            printf("    The PG footer is on - no header.\n");
  50.901 -        }
  50.902 -    printf("\n");
  50.903 -
  50.904 -    /* V.22 George Davis asked for an override switch to force it to list everything */
  50.905 -    if (pswit[VERBOSE_SWITCH]) {
  50.906 -        warn_bin = 1;
  50.907 -        warn_short = 1;
  50.908 -        warn_dotcomma = 1;
  50.909 -        warn_long = 1;
  50.910 -        warn_dash = 1;
  50.911 -        warn_digit = 1;
  50.912 -        warn_ast = 1;
  50.913 -        warn_fslash = 1;
  50.914 -        warn_hyphen = 1;
  50.915 -        warn_endquote = 1;
  50.916 -        printf("   *** Verbose output is ON -- you asked for it! ***\n");
  50.917 -        }
  50.918 -
  50.919 -    if (isDutch)
  50.920 -        warn_dash = 0;  /* Frank suggested turning it REALLY off for Dutch */
  50.921 -
  50.922 -    if ((infile = fopen(filename, "rb")) == NULL) {
  50.923 -        if (pswit[STDOUT_SWITCH])
  50.924 -            fprintf(stdout, "gutcheck: cannot open %s\n", filename);
  50.925 -        else
  50.926 -            fprintf(stderr, "gutcheck: cannot open %s\n", filename);
  50.927 -        exit(1);
  50.928 -        }
  50.929 -
  50.930 -    if (footerline > 0 && firstline > 0 && footerline > firstline && footerline - firstline < 100) { /* ugh */
  50.931 -        printf("   --> I don't really know where this text starts. \n");
  50.932 -        printf("       There are no reference points.\n");
  50.933 -        printf("       I'm going to have to report the header and footer as well.\n");
  50.934 -        firstline=0;
  50.935 -        }
  50.936 -        
  50.937 -
  50.938 -
  50.939 -    /*****************************************************/
  50.940 -    /*                                                   */
  50.941 -    /* Here we go with the main pass. Hold onto yer hat! */
  50.942 -    /*                                                   */
  50.943 -    /*****************************************************/
  50.944 -
  50.945 -    /* Re-init some variables we've dirtied */
  50.946 -    quot = squot = linecnt = 0;
  50.947 -    laststart = CHAR_SPACE;
  50.948 -    lastlen = lastblen = 0;
  50.949 -
  50.950 -    while (flgets(aline, LINEBUFSIZE-1, infile, linecnt+1)) {
  50.951 -        linecnt++;
  50.952 -        if (linecnt == 1) isnewpara = 1;
  50.953 -        if (pswit[DP_SWITCH])
  50.954 -            if (!strncmp(aline, "-----File: ", 11))
  50.955 -                continue;    // skip DP page separators completely
  50.956 -        if (linecnt < firstline || (footerline > 0 && linecnt > footerline)) {
  50.957 -            if (pswit[HEADER_SWITCH]) {
  50.958 -                if (!strncmp(aline, "Title:", 6))
  50.959 -                    printf("    %s\n", aline);
  50.960 -                if (!strncmp (aline, "Author:", 7))
  50.961 -                    printf("    %s\n", aline);
  50.962 -                if (!strncmp(aline, "Release Date:", 13))
  50.963 -                    printf("    %s\n", aline);
  50.964 -                if (!strncmp(aline, "Edition:", 8))
  50.965 -                    printf("    %s\n\n", aline);
  50.966 -                }
  50.967 -            continue;                /* skip through the header */
  50.968 -            }
  50.969 -        checked_linecnt++;
  50.970 -        s = aline;
  50.971 -        isemptyline = 1;      /* assume the line is empty until proven otherwise */
  50.972 -
  50.973 -        /* If we are in a state of unbalanced quotes, and this line    */
  50.974 -        /* doesn't begin with a quote, output the stored error message */
  50.975 -        /* If the -P switch was used, print the warning even if the    */
  50.976 -        /* new para starts with quotes                                 */
  50.977 -        /* Version .20 - if the new paragraph does start with a quote, */
  50.978 -        /* but is indented, I was giving a spurious error. Need to     */
  50.979 -        /* check the first _non-space_ character on the line rather    */
  50.980 -        /* than the first character when deciding whether the para     */
  50.981 -        /* starts with a quote. Using *t for this.                     */
  50.982 -        t = s;
  50.983 -        while (*t == ' ') t++;
  50.984 -        if (*dquote_err)
  50.985 -            if (*t != CHAR_DQUOTE || pswit[QPARA_SWITCH]) {
  50.986 -                if (!pswit[OVERVIEW_SWITCH]) {
  50.987 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
  50.988 -                    printf(dquote_err);
  50.989 -                    }
  50.990 -                else
  50.991 -                    cnt_dquot++;
  50.992 -            }
  50.993 -        if (*squote_err) {
  50.994 -            if (*t != CHAR_SQUOTE && *t != CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] || squot) {
  50.995 -                if (!pswit[OVERVIEW_SWITCH]) {
  50.996 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
  50.997 -                    printf(squote_err);
  50.998 -                    }
  50.999 -                else
 50.1000 -                    cnt_squot++;
 50.1001 -                }
 50.1002 -            squot = 0;
 50.1003 -            }
 50.1004 -        if (*rbrack_err) {
 50.1005 -            if (!pswit[OVERVIEW_SWITCH]) {
 50.1006 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 50.1007 -                printf(rbrack_err);
 50.1008 -                }
 50.1009 -            else
 50.1010 -                cnt_brack++;
 50.1011 -            }
 50.1012 -        if (*sbrack_err) {
 50.1013 -            if (!pswit[OVERVIEW_SWITCH]) {
 50.1014 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 50.1015 -                printf(sbrack_err);
 50.1016 -                }
 50.1017 -            else
 50.1018 -                cnt_brack++;
 50.1019 -            }
 50.1020 -        if (*cbrack_err) {
 50.1021 -            if (!pswit[OVERVIEW_SWITCH]) {
 50.1022 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 50.1023 -                printf(cbrack_err);
 50.1024 -                }
 50.1025 -            else
 50.1026 -                cnt_brack++;
 50.1027 -            }
 50.1028 -        if (*unders_err) {
 50.1029 -            if (!pswit[OVERVIEW_SWITCH]) {
 50.1030 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 50.1031 -                printf(unders_err);
 50.1032 -                }
 50.1033 -            else
 50.1034 -                cnt_brack++;
 50.1035 -            }
 50.1036 -
 50.1037 -        *dquote_err = *squote_err = *rbrack_err = *cbrack_err = 
 50.1038 -            *sbrack_err = *unders_err = 0;
 50.1039 -
 50.1040 -
 50.1041 -        /* look along the line, accumulate the count of quotes, and see */
 50.1042 -        /* if this is an empty line - i.e. a line with nothing on it    */
 50.1043 -        /* but spaces.                                                  */
 50.1044 -        /* V .12 also if line has just spaces, * and/or - on it, don't  */
 50.1045 -        /* count it, since empty lines with asterisks or dashes to      */
 50.1046 -        /* separate sections are common.                                */
 50.1047 -        /* V .15 new single-quote checking - has to be better than the  */
 50.1048 -        /* previous version, but how much better? fingers crossed!      */
 50.1049 -        /* V .20 add period to * and - as characters on a separator line*/
 50.1050 -        s = aline;
 50.1051 -        while (*s) {
 50.1052 -            if (*s == CHAR_DQUOTE) quot++;
 50.1053 -            if (*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
 50.1054 -                if (s == aline) { /* at start of line, it can only be an openquote */
 50.1055 -                    if (strncmp(s+2, "tis", 3) && strncmp(s+2, "Tis", 3)) /* hardcode a very common exception! */
 50.1056 -                        open_single_quote++;
 50.1057 -                    }
 50.1058 -                else
 50.1059 -                    if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
 50.1060 -                        ; /* do nothing! - it's definitely an apostrophe, not a quote */
 50.1061 -                    else        /* it's outside a word - let's check it out */
 50.1062 -                        if (*s == CHAR_OPEN_SQUOTE || gcisalpha(*(s+1))) { /* it damwell better BE an openquote */
 50.1063 -                            if (strncmp(s+1, "tis", 3) && strncmp(s+1, "Tis", 3)) /* hardcode a very common exception! */
 50.1064 -                                open_single_quote++;
 50.1065 -                            }
 50.1066 -                        else { /* now - is it a closequote? */
 50.1067 -                            guessquote = 0;   /* accumulate clues */
 50.1068 -                            if (gcisalpha(*(s-1))) { /* it follows a letter - could be either */
 50.1069 -                                guessquote += 1;
 50.1070 -                                if (*(s-1) == 's') { /* looks like a plural apostrophe */
 50.1071 -                                    guessquote -= 3;
 50.1072 -                                    if (*(s+1) == CHAR_SPACE)  /* bonus marks! */
 50.1073 -                                        guessquote -= 2;
 50.1074 -                                    }
 50.1075 -                                }
 50.1076 -                            else /* it doesn't have a letter either side */
 50.1077 -                                if (strchr(".?!,;:", *(s-1)) && (strchr(".?!,;: ", *(s+1))))
 50.1078 -                                    guessquote += 8; /* looks like a closequote */
 50.1079 -                                else
 50.1080 -                                    guessquote += 1;
 50.1081 -                            if (open_single_quote > close_single_quote)
 50.1082 -                                guessquote += 1; /* give it the benefit of some doubt - if a squote is already open */
 50.1083 -                            else
 50.1084 -                                guessquote -= 1;
 50.1085 -                            if (guessquote >= 0)
 50.1086 -                                close_single_quote++;
 50.1087 -                            }
 50.1088 -
 50.1089 -            if (*s != CHAR_SPACE
 50.1090 -                && *s != '-'
 50.1091 -                && *s != '.'
 50.1092 -                && *s != CHAR_ASTERISK
 50.1093 -                && *s != 13
 50.1094 -                && *s != 10) isemptyline = 0;  /* ignore lines like  *  *  *  as spacers */
 50.1095 -            if (*s == CHAR_UNDERSCORE) c_unders++;
 50.1096 -            if (*s == CHAR_OPEN_CBRACK) c_brack++;
 50.1097 -            if (*s == CHAR_CLOSE_CBRACK) c_brack--;
 50.1098 -            if (*s == CHAR_OPEN_RBRACK) r_brack++;
 50.1099 -            if (*s == CHAR_CLOSE_RBRACK) r_brack--;
 50.1100 -            if (*s == CHAR_OPEN_SBRACK) s_brack++;
 50.1101 -            if (*s == CHAR_CLOSE_SBRACK) s_brack--;
 50.1102 -            s++;
 50.1103 -            }
 50.1104 -
 50.1105 -        if (isnewpara && !isemptyline) {   /* This line is the start of a new paragraph */
 50.1106 -            start_para_line = linecnt;
 50.1107 -            strncpy(parastart, aline, 80); /* Capture its first line in case we want to report it later */
 50.1108 -            parastart[79] = 0;
 50.1109 -            dquotepar = squotepar = 0; /* restart the quote count 0.98 */
 50.1110 -            s = aline;
 50.1111 -            while (!gcisalpha(*s) && !gcisdigit(*s) && *s) s++;    /* V.97 fixed bug - overran line and gave false warning - rare */
 50.1112 -            if (*s >= 'a' && *s <='z') { /* and its first letter is lowercase */
 50.1113 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1114 -                if (!pswit[OVERVIEW_SWITCH])
 50.1115 -                    printf("    Line %ld column %d - Paragraph starts with lower-case\n", linecnt, (int)(s - aline) +1);
 50.1116 -                else
 50.1117 -                    cnt_punct++;
 50.1118 -                }
 50.1119 -            isnewpara = 0; /* Signal the end of new para processing */
 50.1120 -            }
 50.1121 -
 50.1122 -        /* Check for an em-dash broken at line end */
 50.1123 -        if (enddash && *aline == '-') {
 50.1124 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1125 -            if (!pswit[OVERVIEW_SWITCH])
 50.1126 -                printf("    Line %ld column 1 - Broken em-dash?\n", linecnt);
 50.1127 -            else
 50.1128 -                cnt_punct++;
 50.1129 -            }
 50.1130 -        enddash = 0;
 50.1131 -        for (s = aline + strlen(aline) - 1; *s == ' ' && s > aline; s--);
 50.1132 -        if (s >= aline && *s == '-')
 50.1133 -            enddash = 1;
 50.1134 -            
 50.1135 -
 50.1136 -        /* Check for invalid or questionable characters in the line */
 50.1137 -        /* Anything above 127 is invalid for plain ASCII,  and      */
 50.1138 -        /* non-printable control characters should also be flagged. */
 50.1139 -        /* Tabs should generally not be there.                      */
 50.1140 -        /* Jan 06, in 0.99: Hm. For some strange reason, I either   */
 50.1141 -        /* never created or deleted the check for unprintable       */
 50.1142 -        /* control characters. They should be reported even if      */
 50.1143 -        /* warn_bin is on, I think, and in full.                    */
 50.1144 -
 50.1145 -        for (s = aline; *s; s++) {
 50.1146 -            i = (unsigned char) *s;
 50.1147 -            if (i < CHAR_SPACE && i != CHAR_LF && i != CHAR_CR && i != CHAR_TAB) {
 50.1148 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1149 -                if (!pswit[OVERVIEW_SWITCH])
 50.1150 -                    printf("    Line %ld column %d - Control character %d\n", linecnt, (int) (s - aline) + 1, i);
 50.1151 -                else
 50.1152 -                    cnt_bin++;
 50.1153 -                }
 50.1154 -            }
 50.1155 -
 50.1156 -        if (warn_bin) {
 50.1157 -            eNon_A = eTab = eTilde = eCarat = eFSlash = eAst = 0;  /* don't repeat multiple warnings on one line */
 50.1158 -            for (s = aline; *s; s++) {
 50.1159 -                if (!eNon_A && ((*s < CHAR_SPACE && *s != 9 && *s != '\n') || (unsigned char)*s > 127)) {
 50.1160 -                    i = *s;                           /* annoying kludge for signed chars */
 50.1161 -                    if (i < 0) i += 256;
 50.1162 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1163 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1164 -                        if (i > 127 && i < 160)
 50.1165 -                            printf("    Line %ld column %d - Non-ISO-8859 character %d\n", linecnt, (int) (s - aline) + 1, i);
 50.1166 -                        else
 50.1167 -                            printf("    Line %ld column %d - Non-ASCII character %d\n", linecnt, (int) (s - aline) + 1, i);
 50.1168 -                    else
 50.1169 -                        cnt_bin++;
 50.1170 -                    eNon_A = 1;
 50.1171 -                    }
 50.1172 -                if (!eTab && *s == CHAR_TAB) {
 50.1173 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1174 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1175 -                        printf("    Line %ld column %d - Tab character?\n", linecnt, (int) (s - aline) + 1);
 50.1176 -                    else
 50.1177 -                        cnt_odd++;
 50.1178 -                    eTab = 1;
 50.1179 -                    }
 50.1180 -                if (!eTilde && *s == CHAR_TILDE) {  /* often used by OCR software to indicate an unrecognizable character */
 50.1181 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1182 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1183 -                        printf("    Line %ld column %d - Tilde character?\n", linecnt, (int) (s - aline) + 1);
 50.1184 -                    else
 50.1185 -                        cnt_odd++;
 50.1186 -                    eTilde = 1;
 50.1187 -                    }
 50.1188 -                if (!eCarat && *s == CHAR_CARAT) {  
 50.1189 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1190 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1191 -                        printf("    Line %ld column %d - Carat character?\n", linecnt, (int) (s - aline) + 1);
 50.1192 -                    else
 50.1193 -                        cnt_odd++;
 50.1194 -                    eCarat = 1;
 50.1195 -                    }
 50.1196 -                if (!eFSlash && *s == CHAR_FORESLASH && warn_fslash) {  
 50.1197 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1198 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1199 -                        printf("    Line %ld column %d - Forward slash?\n", linecnt, (int) (s - aline) + 1);
 50.1200 -                    else
 50.1201 -                        cnt_odd++;
 50.1202 -                    eFSlash = 1;
 50.1203 -                    }
 50.1204 -                /* report asterisks only in paranoid mode, since they're often deliberate */
 50.1205 -                if (!eAst && pswit[PARANOID_SWITCH] && warn_ast && !isemptyline && *s == CHAR_ASTERISK) {
 50.1206 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1207 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1208 -                        printf("    Line %ld column %d - Asterisk?\n", linecnt, (int) (s - aline) + 1);
 50.1209 -                    else
 50.1210 -                        cnt_odd++;
 50.1211 -                    eAst = 1;
 50.1212 -                    }
 50.1213 -                }
 50.1214 -            }
 50.1215 -
 50.1216 -        /* Check for line too long */
 50.1217 -        if (warn_long) {
 50.1218 -            if (strlen(aline) > LONGEST_PG_LINE) {
 50.1219 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1220 -                if (!pswit[OVERVIEW_SWITCH])
 50.1221 -                    printf("    Line %ld column %d - Long line %d\n", linecnt, strlen(aline), strlen(aline));
 50.1222 -                else
 50.1223 -                    cnt_long++;
 50.1224 -                }
 50.1225 -            }
 50.1226 -
 50.1227 -        /* Check for line too short.                                     */
 50.1228 -        /* This one is a bit trickier to implement: we don't want to     */
 50.1229 -        /* flag the last line of a paragraph for being short, so we      */
 50.1230 -        /* have to wait until we know that our current line is a         */
 50.1231 -        /* "normal" line, then report the _previous_ line if it was too  */
 50.1232 -        /* short. We also don't want to report indented lines like       */
 50.1233 -        /* chapter heads or formatted quotations. We therefore keep      */
 50.1234 -        /* lastlen as the length of the last line examined, and          */
 50.1235 -        /* lastblen as the length of the last but one, and try to        */
 50.1236 -        /* suppress unnecessary warnings by checking that both were of   */
 50.1237 -        /* "normal" length. We keep the first character of the last      */
 50.1238 -        /* line in laststart, and if it was a space, we assume that the  */
 50.1239 -        /* formatting is deliberate. I can't figure out a way to         */
 50.1240 -        /* distinguish something like a quoted verse left-aligned or     */
 50.1241 -        /* the header or footer of a letter from a paragraph of short    */
 50.1242 -        /* lines - maybe if I examined the whole paragraph, and if the   */
 50.1243 -        /* para has less than, say, 8 lines and if all lines are short,  */
 50.1244 -        /* then just assume it's OK? Need to look at some texts to see   */
 50.1245 -        /* how often a formula like this would get the right result.     */
 50.1246 -        /* V0.99 changed the tolerance for length to ignore from 2 to 1  */
 50.1247 -        if (warn_short) {
 50.1248 -            if (strlen(aline) > 1
 50.1249 -                && lastlen > 1 && lastlen < SHORTEST_PG_LINE
 50.1250 -                && lastblen > 1 && lastblen > SHORTEST_PG_LINE
 50.1251 -                && laststart != CHAR_SPACE) {
 50.1252 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
 50.1253 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1254 -                        printf("    Line %ld column %d - Short line %d?\n", linecnt-1, strlen(prevline), strlen(prevline));
 50.1255 -                    else
 50.1256 -                        cnt_short++;
 50.1257 -                    }
 50.1258 -            }
 50.1259 -        lastblen = lastlen;
 50.1260 -        lastlen = strlen(aline);
 50.1261 -        laststart = aline[0];
 50.1262 -
 50.1263 -        /* look for punctuation at start of line */
 50.1264 -        if  (*aline && strchr(".?!,;:",  aline[0]))  {            /* if it's punctuation */
 50.1265 -            if (strncmp(". . .", aline, 5)) {   /* exception for ellipsis: V.98 tightened up to except only a full ellipsis */
 50.1266 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1267 -                if (!pswit[OVERVIEW_SWITCH])
 50.1268 -                    printf("    Line %ld column 1 - Begins with punctuation?\n", linecnt);
 50.1269 -                else
 50.1270 -                    cnt_punct++;
 50.1271 -                }
 50.1272 -            }
 50.1273 -
 50.1274 -        /* Check for spaced em-dashes                            */
 50.1275 -        /* V.20 must check _all_ occurrences of "--" on the line */
 50.1276 -        /* hence the loop - even if the first double-dash is OK  */
 50.1277 -        /* there may be another that's wrong later on.           */
 50.1278 -        if (warn_dash) {
 50.1279 -            s = aline;
 50.1280 -            while (strstr(s,"--")) {
 50.1281 -                if (*(strstr(s, "--")-1) == CHAR_SPACE ||
 50.1282 -                   (*(strstr(s, "--")+2) == CHAR_SPACE)) {
 50.1283 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1284 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1285 -                        printf("    Line %ld column %d - Spaced em-dash?\n", linecnt, (int) (strstr(s,"--") - aline) + 1);
 50.1286 -                    else
 50.1287 -                        cnt_dash++;
 50.1288 -                    }
 50.1289 -                s = strstr(s,"--") + 2;
 50.1290 -                }
 50.1291 -            }
 50.1292 -
 50.1293 -        /* Check for spaced dashes */
 50.1294 -        if (warn_dash)
 50.1295 -            if (strstr(aline," -")) {
 50.1296 -                if (*(strstr(aline, " -")+2) != '-') {
 50.1297 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1298 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1299 -                        printf("    Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline," -") - aline) + 1);
 50.1300 -                    else
 50.1301 -                        cnt_dash++;
 50.1302 -                    }
 50.1303 -                }
 50.1304 -            else
 50.1305 -                if (strstr(aline,"- ")) {
 50.1306 -                    if (*(strstr(aline, "- ")-1) != '-') {
 50.1307 -                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1308 -                        if (!pswit[OVERVIEW_SWITCH])
 50.1309 -                            printf("    Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline,"- ") - aline) + 1);
 50.1310 -                        else
 50.1311 -                            cnt_dash++;
 50.1312 -                        }
 50.1313 -                    }
 50.1314 -
 50.1315 -        /* v 0.99                                                       */
 50.1316 -        /* Check for unmarked paragraphs indicated by separate speakers */
 50.1317 -        /* May well be false positive:                                  */
 50.1318 -        /* "Bravo!" "Wonderful!" called the crowd.                      */
 50.1319 -        /* but useful all the same.                                     */
 50.1320 -        s = wrk;
 50.1321 -        *s = 0;
 50.1322 -        if (strstr(aline, "\" \"")) s = strstr(aline, "\" \"");
 50.1323 -        if (strstr(aline, "\"  \"")) s = strstr(aline, "\"  \"");
 50.1324 -        if (*s) {
 50.1325 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1326 -            if (!pswit[OVERVIEW_SWITCH])
 50.1327 -                printf("    Line %ld column %d - Query missing paragraph break?\n", linecnt, (int)(s - aline) +1);
 50.1328 -            else
 50.1329 -                cnt_punct++;
 50.1330 -            }
 50.1331 -
 50.1332 -
 50.1333 -
 50.1334 -        /* Check for "to he" and other easy he/be errors          */
 50.1335 -        /* This is a very inadequate effort on the he/be problem, */
 50.1336 -        /* but the phrase "to he" is always an error, whereas "to */
 50.1337 -        /* be" is quite common. I chuckle when it does catch one! */
 50.1338 -        /* Similarly, '"Quiet!", be said.' is a non-be error      */
 50.1339 -        /* V .18 - "to he" is _not_ always an error!:             */
 50.1340 -        /*           "Where they went to he couldn't say."        */
 50.1341 -        /* but I'm leaving it in anyway.                          */
 50.1342 -        /* V .20 Another false positive:                          */
 50.1343 -        /*       What would "Cinderella" be without the . . .     */
 50.1344 -        /* and another "If he wants to he can see for himself."   */
 50.1345 -        /* V .21 Added " is be " and " be is " and " be was "     */
 50.1346 -        /* V .99 Added jeebies code -- removed again.             */
 50.1347 -        /*       Is jeebies code worth adding? Rare to see he/be  */
 50.1348 -        /*       errors with modern OCR. Separate program? Yes!   */
 50.1349 -        /*       jeebies does the job without cluttering up this. */
 50.1350 -        /*       We do get a few more queryable pairs from the    */
 50.1351 -        /*       project though -- they're cheap to implement.    */
 50.1352 -        /*       Also added a column number for guiguts.          */
 50.1353 -
 50.1354 -        s = wrk;
 50.1355 -        *s = 0;
 50.1356 -        if (strstr(aline," to he ")) s = strstr(aline," to he ");
 50.1357 -        if (strstr(aline,"\" be ")) s = strstr(aline,"\" be ");
 50.1358 -        if (strstr(aline,"\", be ")) s = strstr(aline,"\", be ");
 50.1359 -        if (strstr(aline," is be ")) s = strstr(aline," is be ");
 50.1360 -        if (strstr(aline," be is ")) s = strstr(aline," be is ");
 50.1361 -        if (strstr(aline," was be ")) s = strstr(aline," was be ");
 50.1362 -        if (strstr(aline," be would ")) s = strstr(aline," be would ");
 50.1363 -        if (strstr(aline," be could ")) s = strstr(aline," be could ");
 50.1364 -        if (*s) {
 50.1365 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1366 -            if (!pswit[OVERVIEW_SWITCH])
 50.1367 -                printf("    Line %ld column %d - Query he/be error?\n", linecnt, (int)(s - aline) +1);
 50.1368 -            else
 50.1369 -                cnt_word++;
 50.1370 -            }
 50.1371 -
 50.1372 -        s = wrk;
 50.1373 -        *s = 0;
 50.1374 -        if (strstr(aline," i bad ")) s = strstr(aline," i bad ");
 50.1375 -        if (strstr(aline," you bad ")) s = strstr(aline," you bad ");
 50.1376 -        if (strstr(aline," he bad ")) s = strstr(aline," he bad ");
 50.1377 -        if (strstr(aline," she bad ")) s = strstr(aline," she bad ");
 50.1378 -        if (strstr(aline," they bad ")) s = strstr(aline," they bad ");
 50.1379 -        if (strstr(aline," a had ")) s = strstr(aline," a had ");
 50.1380 -        if (strstr(aline," the had ")) s = strstr(aline," the had ");
 50.1381 -        if (*s) {
 50.1382 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1383 -            if (!pswit[OVERVIEW_SWITCH])
 50.1384 -                printf("    Line %ld column %d - Query had/bad error?\n", linecnt, (int)(s - aline) +1);
 50.1385 -            else
 50.1386 -                cnt_word++;
 50.1387 -            }
 50.1388 -
 50.1389 -
 50.1390 -        /* V .97 Added ", hut "  Not too common, hut pretty certain   */
 50.1391 -        /* V.99 changed to add a column number for guiguts            */
 50.1392 -        s = wrk;
 50.1393 -        *s = 0;
 50.1394 -        if (strstr(aline,", hut ")) s = strstr(aline,", hut ");
 50.1395 -        if (strstr(aline,"; hut ")) s = strstr(aline,"; hut ");
 50.1396 -        if (*s) {
 50.1397 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1398 -            if (!pswit[OVERVIEW_SWITCH])
 50.1399 -                printf("    Line %ld column %d - Query hut/but error?\n", linecnt, (int)(s - aline) +1);
 50.1400 -            else
 50.1401 -                cnt_word++;
 50.1402 -            }
 50.1403 -
 50.1404 -        /* Special case - angled bracket in front of "From" placed there by an MTA */
 50.1405 -        /* when sending an e-mail.  V .21                                          */
 50.1406 -        if (strstr(aline, ">From")) {
 50.1407 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1408 -            if (!pswit[OVERVIEW_SWITCH])
 50.1409 -                printf("    Line %ld column %d - Query angled bracket with From\n", linecnt, (int)(strstr(aline, ">From") - aline) +1);
 50.1410 -            else
 50.1411 -                cnt_punct++;
 50.1412 -            }
 50.1413 -
 50.1414 -        /* V 0.98 Check for a single character line - often an overflow from bad wrapping. */
 50.1415 -        if (*aline && !*(aline+1)) {
 50.1416 -            if (*aline == 'I' || *aline == 'V' || *aline == 'X' || *aline == 'L' || gcisdigit(*aline))
 50.1417 -                ; /* nothing - ignore numerals alone on a line. */
 50.1418 -            else {
 50.1419 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1420 -                if (!pswit[OVERVIEW_SWITCH])
 50.1421 -                    printf("    Line %ld column 1 - Query single character line\n", linecnt);
 50.1422 -                else
 50.1423 -                    cnt_punct++;
 50.1424 -                }
 50.1425 -            }
 50.1426 -
 50.1427 -        /* V 0.98 Check for I" - often should be ! */
 50.1428 -        if (strstr(aline, " I\"")) {
 50.1429 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1430 -            if (!pswit[OVERVIEW_SWITCH])
 50.1431 -                printf("    Line %ld column %ld - Query I=exclamation mark?\n", linecnt, strstr(aline, " I\"") - aline);
 50.1432 -            else
 50.1433 -                cnt_punct++;
 50.1434 -            }
 50.1435 -
 50.1436 -        /* V 0.98 Check for period without a capital letter. Cut-down from gutspell */
 50.1437 -        /*        Only works when it happens on a single line.                      */
 50.1438 -
 50.1439 -        if (pswit[PARANOID_SWITCH])
 50.1440 -            for (t = s = aline; strstr(t,". ");) {
 50.1441 -                t = strstr(t, ". ");
 50.1442 -                if (t == s)  {
 50.1443 -                    t++;
 50.1444 -                    continue; /* start of line punctuation is handled elsewhere */
 50.1445 -                    }
 50.1446 -                if (!gcisalpha(*(t-1))) {
 50.1447 -                    t++;
 50.1448 -                    continue;
 50.1449 -                    }
 50.1450 -                if (isDutch) {  /* For Frank & Jeroen -- 's Middags case */
 50.1451 -                    if (*(t+2) == CHAR_SQUOTE &&
 50.1452 -                      *(t+3)>='a' && *(t+3)<='z' &&
 50.1453 -                      *(t+4) == CHAR_SPACE &&
 50.1454 -                      *(t+5)>='A' && *(t+5)<='Z') {
 50.1455 -                        t++;
 50.1456 -                        continue;
 50.1457 -                        }
 50.1458 -                      }
 50.1459 -                s1 = t+2;
 50.1460 -                while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
 50.1461 -                    s1++;
 50.1462 -                if (*s1 >= 'a' && *s1 <= 'z') {  /* we have something to investigate */
 50.1463 -                    istypo = 1;
 50.1464 -                    for (s1 = t - 1; s1 >= s && 
 50.1465 -                        (gcisalpha(*s1) || gcisdigit(*s1) || 
 50.1466 -                        (*s1 == CHAR_SQUOTE && gcisalpha(*(s1+1)) && gcisalpha(*(s1-1)))); s1--); /* so let's go back and find out */
 50.1467 -                    s1++;
 50.1468 -                    for (i = 0; *s1 && *s1 != '.'; s1++, i++)
 50.1469 -                        testword[i] = *s1;
 50.1470 -                    testword[i] = 0;
 50.1471 -                    for (i = 0; *abbrev[i]; i++)
 50.1472 -                        if (!strcmp(testword, abbrev[i]))
 50.1473 -                            istypo = 0;
 50.1474 -//                    if (*testword >= 'A' && *testword <= 'Z') 
 50.1475 -//                        istypo = 0;
 50.1476 -                    if (gcisdigit(*testword)) istypo = 0;
 50.1477 -                    if (!*(testword+1)) istypo = 0;
 50.1478 -                    if (isroman(testword)) istypo = 0;
 50.1479 -                    if (istypo) {
 50.1480 -                        istypo = 0;
 50.1481 -                        for (i = 0; testword[i]; i++)
 50.1482 -                            if (strchr(vowels, testword[i]))
 50.1483 -                                istypo = 1;
 50.1484 -                        }
 50.1485 -                    if (istypo) {
 50.1486 -                        isdup = 0;
 50.1487 -                        if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
 50.1488 -                            for (i = 0; i < qperiod_index; i++)
 50.1489 -                                if (!strcmp(testword, qperiod[i])) {
 50.1490 -                                    isdup = 1;
 50.1491 -                                    }
 50.1492 -                        if (!isdup) {
 50.1493 -                            if (qperiod_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
 50.1494 -                                strcpy(qperiod[qperiod_index], testword);
 50.1495 -                                qperiod_index++;
 50.1496 -                                }
 50.1497 -                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1498 -                            if (!pswit[OVERVIEW_SWITCH])
 50.1499 -                                printf("    Line %ld column %d - Extra period?\n", linecnt, (int)(t - aline)+1);
 50.1500 -                            else
 50.1501 -                                cnt_punct++;
 50.1502 -                            }
 50.1503 -                        }
 50.1504 -                    }
 50.1505 -                t++;
 50.1506 -                }
 50.1507 -
 50.1508 -
 50.1509 -        if (pswit[TYPO_SWITCH]) {    /* Should have put this condition in at the start of 0.99. Duh! */
 50.1510 -            /* Check for words usually not followed by punctuation 0.99 */
 50.1511 -            for (s = aline; *s;) {
 50.1512 -                wordstart = s;
 50.1513 -                s = getaword(s, inword);
 50.1514 -                if (!*inword) continue;
 50.1515 -                lowerit(inword);
 50.1516 -                for (i = 0; *nocomma[i]; i++)
 50.1517 -                    if (!strcmp(inword, nocomma[i])) {
 50.1518 -                        if (*s == ',' || *s == ';' || *s == ':') {
 50.1519 -                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1520 -                            if (!pswit[OVERVIEW_SWITCH])
 50.1521 -                                printf("    Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
 50.1522 -                            else
 50.1523 -                                cnt_punct++;
 50.1524 -                            }
 50.1525 -                        }
 50.1526 -                for (i = 0; *noperiod[i]; i++)
 50.1527 -                    if (!strcmp(inword, noperiod[i])) {
 50.1528 -                        if (*s == '.' || *s == '!') {
 50.1529 -                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1530 -                            if (!pswit[OVERVIEW_SWITCH])
 50.1531 -                                printf("    Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
 50.1532 -                            else
 50.1533 -                                cnt_punct++;
 50.1534 -                            }
 50.1535 -                        }
 50.1536 -                }
 50.1537 -            }
 50.1538 -
 50.1539 -
 50.1540 -
 50.1541 -        /* Check for commonly mistyped words, and digits like 0 for O in a word */
 50.1542 -        for (s = aline; *s;) {
 50.1543 -            wordstart = s;
 50.1544 -            s = getaword(s, inword);
 50.1545 -            if (!*inword) continue; /* don't bother with empty lines */
 50.1546 -            if (mixdigit(inword)) {
 50.1547 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1548 -                if (!pswit[OVERVIEW_SWITCH])
 50.1549 -                    printf("    Line %ld column %ld - Query digit in %s\n", linecnt, (int)(wordstart - aline) + 1, inword);
 50.1550 -                else
 50.1551 -                    cnt_word++;
 50.1552 -                }
 50.1553 -
 50.1554 -            /* put the word through a series of tests for likely typos and OCR errors */
 50.1555 -            /* V.21 I had allowed lots of typo-checking even with the typo switch     */
 50.1556 -            /* turned off, but I really should disallow reporting of them when        */
 50.1557 -            /* the switch is off. Hence the "if" below.                               */
 50.1558 -            if (pswit[TYPO_SWITCH]) {
 50.1559 -                istypo = 0;
 50.1560 -                strcpy(testword, inword);
 50.1561 -                alower = 0;
 50.1562 -                for (i = 0; i < (signed int)strlen(testword); i++) { /* lowercase for testing */
 50.1563 -                    if (testword[i] >= 'a' && testword[i] <= 'z') alower = 1;
 50.1564 -                    if (alower && testword[i] >= 'A' && testword[i] <= 'Z') {
 50.1565 -                        /* we have an uppercase mid-word. However, there are common cases: */
 50.1566 -                        /*   Mac and Mc like McGill                                        */
 50.1567 -                        /*   French contractions like l'Abbe                               */
 50.1568 -                        if ((i == 2 && testword[0] == 'm' && testword[1] == 'c') ||
 50.1569 -                            (i == 3 && testword[0] == 'm' && testword[1] == 'a' && testword[2] == 'c') ||
 50.1570 -                            (i > 0 && testword[i-1] == CHAR_SQUOTE))
 50.1571 -                                ; /* do nothing! */
 50.1572 -
 50.1573 -                        else {  /* V.97 - remove separate case of uppercase within word so that         */
 50.1574 -                                /* names like VanAllen fall into qword_index and get reported only once */
 50.1575 -                            istypo = 1;
 50.1576 -                            }
 50.1577 -                        }
 50.1578 -                    testword[i] = (char)tolower(testword[i]);
 50.1579 -                    }
 50.1580 -
 50.1581 -                /* check for certain unlikely two-letter combinations at word start and end */
 50.1582 -                /* V.0.97 - this replaces individual hardcoded checks in previous versions */
 50.1583 -                if (strlen(testword) > 1) {
 50.1584 -                    for (i = 0; *nostart[i]; i++)
 50.1585 -                        if (!strncmp(testword, nostart[i], 2))
 50.1586 -                            istypo = 1;
 50.1587 -                    for (i = 0; *noend[i]; i++)
 50.1588 -                        if (!strncmp(testword + strlen(testword) -2, noend[i], 2))
 50.1589 -                            istypo = 1;
 50.1590 -                    }
 50.1591 -
 50.1592 -
 50.1593 -                /* ght is common, gbt never. Like that. */
 50.1594 -                if (strstr(testword, "cb")) istypo = 1;
 50.1595 -                if (strstr(testword, "gbt")) istypo = 1;
 50.1596 -                if (strstr(testword, "pbt")) istypo = 1;
 50.1597 -                if (strstr(testword, "tbs")) istypo = 1;
 50.1598 -                if (strstr(testword, "mrn")) istypo = 1;
 50.1599 -                if (strstr(testword, "ahle")) istypo = 1;
 50.1600 -                if (strstr(testword, "ihle")) istypo = 1;
 50.1601 -
 50.1602 -                /* "TBE" does happen - like HEARTBEAT - but uncommon.                    */
 50.1603 -                /*  Also "TBI" - frostbite, outbid - but uncommon.                       */
 50.1604 -                /*  Similarly "ii" like Hawaii, or Pompeii, and in Roman numerals,       */
 50.1605 -                /*  but these are covered in V.20. "ii" is a common scanno.              */
 50.1606 -                if (strstr(testword, "tbi")) istypo = 1;
 50.1607 -                if (strstr(testword, "tbe")) istypo = 1;
 50.1608 -                if (strstr(testword, "ii")) istypo = 1;
 50.1609 -
 50.1610 -                /* check for no vowels or no consonants. */
 50.1611 -                /* If none, flag a typo                  */
 50.1612 -                if (!istypo && strlen(testword)>1) {
 50.1613 -                    vowel = consonant = 0;
 50.1614 -                    for (i = 0; testword[i]; i++)
 50.1615 -                        if (testword[i] == 'y' || gcisdigit(testword[i])) {  /* Yah, this is loose. */
 50.1616 -                            vowel++;
 50.1617 -                            consonant++;
 50.1618 -                            }
 50.1619 -                        else
 50.1620 -                            if  (strchr(vowels, testword[i])) vowel++;
 50.1621 -                            else consonant++;
 50.1622 -                    if (!vowel || !consonant) {
 50.1623 -                        istypo = 1;
 50.1624 -                        }
 50.1625 -                    }
 50.1626 -
 50.1627 -                /* now exclude the word from being reported if it's in */
 50.1628 -                /* the okword list                                     */
 50.1629 -                for (i = 0; *okword[i]; i++)
 50.1630 -                    if (!strcmp(testword, okword[i]))
 50.1631 -                        istypo = 0;
 50.1632 -
 50.1633 -                /* what looks like a typo may be a Roman numeral. Exclude these */
 50.1634 -                if (istypo)
 50.1635 -                    if (isroman(testword))
 50.1636 -                        istypo = 0;
 50.1637 -
 50.1638 -                /* check the manual list of typos */
 50.1639 -                if (!istypo)
 50.1640 -                    for (i = 0; *typo[i]; i++)
 50.1641 -                        if (!strcmp(testword, typo[i]))
 50.1642 -                            istypo = 1;
 50.1643 -
 50.1644 -
 50.1645 -                /* V.21 - check lowercase s and l - special cases */
 50.1646 -                /* V.98 - added "i" and "m"                       */
 50.1647 -                /* V.99 - added "j" often a semi-colon gone wrong */
 50.1648 -                /*      - and "d" for a missing apostrophe - he d */
 50.1649 -                /*      - and "n" for "in"                        */
 50.1650 -                if (!istypo && strlen(testword) == 1)
 50.1651 -                    if (strchr("slmijdn", *inword))
 50.1652 -                        istypo = 1;
 50.1653 -
 50.1654 -
 50.1655 -                if (istypo) {
 50.1656 -                    isdup = 0;
 50.1657 -                    if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
 50.1658 -                        for (i = 0; i < qword_index; i++)
 50.1659 -                            if (!strcmp(testword, qword[i])) {
 50.1660 -                                isdup = 1;
 50.1661 -                                ++dupcnt[i];
 50.1662 -                                }
 50.1663 -                    if (!isdup) {
 50.1664 -                        if (qword_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
 50.1665 -                            strcpy(qword[qword_index], testword);
 50.1666 -                            qword_index++;
 50.1667 -                            }
 50.1668 -                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1669 -                        if (!pswit[OVERVIEW_SWITCH]) {
 50.1670 -                            printf("    Line %ld column %d - Query word %s", linecnt, (int)(wordstart - aline) + 1, inword);
 50.1671 -                            if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
 50.1672 -                                printf(" - not reporting duplicates");
 50.1673 -                            printf("\n");
 50.1674 -                            }
 50.1675 -                        else
 50.1676 -                            cnt_word++;
 50.1677 -                        }
 50.1678 -                    }
 50.1679 -                }        /* end of typo-checking */
 50.1680 -
 50.1681 -                /* check the user's list of typos */
 50.1682 -                if (!istypo)
 50.1683 -                    if (usertypo_count)
 50.1684 -                        for (i = 0; i < usertypo_count; i++)
 50.1685 -                            if (!strcmp(testword, usertypo[i])) {
 50.1686 -                                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1687 -                                if (!pswit[OVERVIEW_SWITCH])  
 50.1688 -                                    printf("    Line %ld column %d - Query possible scanno %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
 50.1689 -                                }
 50.1690 -
 50.1691 -
 50.1692 -
 50.1693 -            if (pswit[PARANOID_SWITCH] && warn_digit) {   /* in paranoid mode, query all 0 and 1 standing alone - added warn_digit V.97*/
 50.1694 -                if (!strcmp(inword, "0") || !strcmp(inword, "1")) {
 50.1695 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1696 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1697 -                        printf("    Line %ld column %d - Query standalone %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
 50.1698 -                    else
 50.1699 -                        cnt_word++;
 50.1700 -                    }
 50.1701 -                }
 50.1702 -            }
 50.1703 -
 50.1704 -        /* look for added or missing spaces around punctuation and quotes */
 50.1705 -        /* If there is a punctuation character like ! with no space on    */
 50.1706 -        /* either side, suspect a missing!space. If there are spaces on   */
 50.1707 -        /* both sides , assume a typo. If we see a double quote with no   */
 50.1708 -        /* space or punctuation on either side of it, assume unspaced     */
 50.1709 -        /* quotes "like"this.                                             */
 50.1710 -        llen = strlen(aline);
 50.1711 -        for (i = 1; i < llen; i++) {                               /* for each character in the line after the first */
 50.1712 -            if  (strchr(".?!,;:_", aline[i])) {                    /* if it's punctuation */
 50.1713 -                isacro = 0;                       /* we need to suppress warnings for acronyms like M.D. */
 50.1714 -                isellipsis = 0;                   /* we need to suppress warnings for ellipsis . . . */
 50.1715 -                if ( (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) ||     /* if there are letters on both sides of it or ... */
 50.1716 -                   (gcisalpha(aline[i+1]) && strchr("?!,;:", aline[i]))) { /* ...if it's strict punctuation followed by an alpha */
 50.1717 -                    if (aline[i] == '.') {
 50.1718 -                        if (i > 2)
 50.1719 -                            if (aline[i-2] == '.') isacro = 1;
 50.1720 -                        if (i + 2 < llen)
 50.1721 -                            if (aline[i+2] == '.') isacro = 1;
 50.1722 -                        }
 50.1723 -                    if (!isacro) {
 50.1724 -                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1725 -                        if (!pswit[OVERVIEW_SWITCH])
 50.1726 -                            printf("    Line %ld column %d - Missing space?\n", linecnt, i+1);
 50.1727 -                        else
 50.1728 -                            cnt_punct++;
 50.1729 -                        }
 50.1730 -                    }
 50.1731 -                if (aline[i-1] == CHAR_SPACE && (aline[i+1] == CHAR_SPACE || aline[i+1] == 0)) { /* if there are spaces on both sides, or space before and end of line */
 50.1732 -                    if (aline[i] == '.') {
 50.1733 -                        if (i > 2)
 50.1734 -                            if (aline[i-2] == '.') isellipsis = 1;
 50.1735 -                        if (i + 2 < llen)
 50.1736 -                            if (aline[i+2] == '.') isellipsis = 1;
 50.1737 -                        }
 50.1738 -                    if (!isemptyline && !isellipsis) {
 50.1739 -                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1740 -                        if (!pswit[OVERVIEW_SWITCH])
 50.1741 -                            printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
 50.1742 -                        else
 50.1743 -                            cnt_punct++;
 50.1744 -                        }
 50.1745 -                    }
 50.1746 -                }
 50.1747 -            }
 50.1748 -
 50.1749 -        /* 0.98 -- split out the characters that CANNOT be preceded by space */
 50.1750 -        llen = strlen(aline);
 50.1751 -        for (i = 1; i < llen; i++) {                             /* for each character in the line after the first */
 50.1752 -            if  (strchr("?!,;:", aline[i])) {                    /* if it's punctuation that _cannot_ have a space before it */
 50.1753 -                if (aline[i-1] == CHAR_SPACE && !isemptyline && aline[i+1] != CHAR_SPACE) { /* if aline[i+1) DOES == space, it was already reported just above */
 50.1754 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1755 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1756 -                        printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
 50.1757 -                    else
 50.1758 -                        cnt_punct++;
 50.1759 -                    }
 50.1760 -                }
 50.1761 -            }
 50.1762 -
 50.1763 -
 50.1764 -        /* 0.99 -- special case " .X" where X is any alpha. */
 50.1765 -        /* This plugs a hole in the acronym code above. Inelegant, but maintainable. */
 50.1766 -        llen = strlen(aline);
 50.1767 -        for (i = 1; i < llen; i++) {             /* for each character in the line after the first */
 50.1768 -            if  (aline[i] == '.') {              /* if it's a period */
 50.1769 -                if (aline[i-1] == CHAR_SPACE && gcisalpha(aline[i+1])) { /* if the period follows a space and is followed by a letter */
 50.1770 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1771 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1772 -                        printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
 50.1773 -                    else
 50.1774 -                        cnt_punct++;
 50.1775 -                    }
 50.1776 -                }
 50.1777 -            }
 50.1778 -
 50.1779 -
 50.1780 -
 50.1781 -
 50.1782 -        /* v.21 breaking out the search for unspaced doublequotes        */
 50.1783 -        /* This is not as efficient, but it's more maintainable          */
 50.1784 -        /* V.97 added underscore to the list of characters not to query, */
 50.1785 -        /* since underscores are commonly used as italics indicators.    */
 50.1786 -        /* V.98 Added slash as well, same reason.                        */
 50.1787 -        for (i = 1; i < llen; i++) {                               /* for each character in the line after the first */
 50.1788 -            if (aline[i] == CHAR_DQUOTE) {
 50.1789 -                if ((!strchr(" _-.'`,;:!/([{?}])",  aline[i-1]) &&
 50.1790 -                     !strchr(" _-.'`,;:!/([{?}])",  aline[i+1]) &&
 50.1791 -                     aline[i+1] != 0
 50.1792 -                     || (!strchr(" _-([{'`", aline[i-1]) && gcisalpha(aline[i+1])))) {
 50.1793 -                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1794 -                        if (!pswit[OVERVIEW_SWITCH])
 50.1795 -                            printf("    Line %ld column %d - Unspaced quotes?\n", linecnt, i+1);
 50.1796 -                        else
 50.1797 -                            cnt_punct++;
 50.1798 -                        }
 50.1799 -                }
 50.1800 -            }
 50.1801 -
 50.1802 -
 50.1803 -        /* v.98 check parity of quotes                             */
 50.1804 -        /* v.99 added !*(s+1) in some tests to catch "I am," he said, but I will not be soon". */
 50.1805 -        for (s = aline; *s; s++) {
 50.1806 -            if (*s == CHAR_DQUOTE) {
 50.1807 -                if (!(dquotepar = !dquotepar)) {    /* parity even */
 50.1808 -                    if (!strchr("_-.'`/,;:!?)]} ",  *(s+1))) {
 50.1809 -                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1810 -                        if (!pswit[OVERVIEW_SWITCH])
 50.1811 -                            printf("    Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
 50.1812 -                        else
 50.1813 -                            cnt_punct++;
 50.1814 -                        }
 50.1815 -                    }
 50.1816 -                else {                              /* parity odd */
 50.1817 -                    if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/.'`([{$",  *(s+1)) || !*(s+1)) {
 50.1818 -                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1819 -                        if (!pswit[OVERVIEW_SWITCH])
 50.1820 -                            printf("    Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
 50.1821 -                        else
 50.1822 -                            cnt_punct++;
 50.1823 -                        }
 50.1824 -                    }
 50.1825 -                }
 50.1826 -            }
 50.1827 -
 50.1828 -            if (*aline == CHAR_DQUOTE) {
 50.1829 -                if (strchr(",;:!?)]} ", aline[1])) {
 50.1830 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1831 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1832 -                        printf("    Line %ld column 1 - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
 50.1833 -                    else
 50.1834 -                        cnt_punct++;
 50.1835 -                    }
 50.1836 -                }
 50.1837 -
 50.1838 -        if (pswit[SQUOTE_SWITCH])
 50.1839 -            for (s = aline; *s; s++) {
 50.1840 -                if ((*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
 50.1841 -                     && ( s == aline || (s > aline && !gcisalpha(*(s-1))) || !gcisalpha(*(s+1)))) {
 50.1842 -                    if (!(squotepar = !squotepar)) {    /* parity even */
 50.1843 -                        if (!strchr("_-.'`/\",;:!?)]} ",  *(s+1))) {
 50.1844 -                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1845 -                            if (!pswit[OVERVIEW_SWITCH])
 50.1846 -                                printf("    Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
 50.1847 -                            else
 50.1848 -                                cnt_punct++;
 50.1849 -                            }
 50.1850 -                        }
 50.1851 -                    else {                              /* parity odd */
 50.1852 -                        if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/\".'`",  *(s+1)) || !*(s+1)) {
 50.1853 -                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1854 -                            if (!pswit[OVERVIEW_SWITCH])
 50.1855 -                                printf("    Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
 50.1856 -                            else
 50.1857 -                                cnt_punct++;
 50.1858 -                            }
 50.1859 -                        }
 50.1860 -                    }
 50.1861 -                }
 50.1862 -                    
 50.1863 -
 50.1864 -        /* v.20 also look for double punctuation like ,. or ,,     */
 50.1865 -        /* Thanks to DW for the suggestion!                        */
 50.1866 -        /* I'm putting this in a separate loop for clarity         */
 50.1867 -        /* In books with references, ".," and ".;" are common      */
 50.1868 -        /* e.g. "etc., etc.," and vol. 1.; vol 3.;                 */
 50.1869 -        /* OTOH, from my initial tests, there are also fairly      */
 50.1870 -        /* common errors. What to do? Make these cases paranoid?   */
 50.1871 -        /* V.21 ".," is the most common, so invented warn_dotcomma */
 50.1872 -        /* to suppress detailed reporting if it occurs often       */
 50.1873 -        llen = strlen(aline);
 50.1874 -        for (i = 0; i < llen; i++)                  /* for each character in the line */
 50.1875 -            if (strchr(".?!,;:", aline[i])          /* if it's punctuation */
 50.1876 -            && (strchr(".?!,;:", aline[i+1]))
 50.1877 -            && aline[i] && aline[i+1])      /* followed by punctuation, it's a query, unless . . . */
 50.1878 -                if (
 50.1879 -                  (aline[i] == aline[i+1]
 50.1880 -                  && (aline[i] == '.' || aline[i] == '?' || aline[i] == '!'))
 50.1881 -                  || (!warn_dotcomma && aline[i] == '.' && aline[i+1] == ',')
 50.1882 -                  || (isFrench && !strncmp(aline+i, ",...", 4))
 50.1883 -                  || (isFrench && !strncmp(aline+i, "...,", 4))
 50.1884 -                  || (isFrench && !strncmp(aline+i, ";...", 4))
 50.1885 -                  || (isFrench && !strncmp(aline+i, "...;", 4))
 50.1886 -                  || (isFrench && !strncmp(aline+i, ":...", 4))
 50.1887 -                  || (isFrench && !strncmp(aline+i, "...:", 4))
 50.1888 -                  || (isFrench && !strncmp(aline+i, "!...", 4))
 50.1889 -                  || (isFrench && !strncmp(aline+i, "...!", 4))
 50.1890 -                  || (isFrench && !strncmp(aline+i, "?...", 4))
 50.1891 -                  || (isFrench && !strncmp(aline+i, "...?", 4))
 50.1892 -                ) {
 50.1893 -                if ((isFrench && !strncmp(aline+i, ",...", 4))    /* could this BE any more awkward? */
 50.1894 -                  || (isFrench && !strncmp(aline+i, "...,", 4))
 50.1895 -                  || (isFrench && !strncmp(aline+i, ";...", 4))
 50.1896 -                  || (isFrench && !strncmp(aline+i, "...;", 4))
 50.1897 -                  || (isFrench && !strncmp(aline+i, ":...", 4))
 50.1898 -                  || (isFrench && !strncmp(aline+i, "...:", 4))
 50.1899 -                  || (isFrench && !strncmp(aline+i, "!...", 4))
 50.1900 -                  || (isFrench && !strncmp(aline+i, "...!", 4))
 50.1901 -                  || (isFrench && !strncmp(aline+i, "?...", 4))
 50.1902 -                  || (isFrench && !strncmp(aline+i, "...?", 4)))
 50.1903 -                    i +=4;
 50.1904 -                        ; /* do nothing for .. !! and ?? which can be legit */
 50.1905 -                    }
 50.1906 -                else {
 50.1907 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1908 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1909 -                        printf("    Line %ld column %d - Double punctuation?\n", linecnt, i+1);
 50.1910 -                    else
 50.1911 -                        cnt_punct++;
 50.1912 -                    }
 50.1913 -
 50.1914 -        /* v.21 breaking out the search for spaced doublequotes */
 50.1915 -        /* This is not as efficient, but it's more maintainable */
 50.1916 -        s = aline;
 50.1917 -        while (strstr(s," \" ")) {
 50.1918 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1919 -            if (!pswit[OVERVIEW_SWITCH])
 50.1920 -                printf("    Line %ld column %d - Spaced doublequote?\n", linecnt, (int)(strstr(s," \" ")-aline+1));
 50.1921 -            else
 50.1922 -                cnt_punct++;
 50.1923 -            s = strstr(s," \" ") + 2;
 50.1924 -            }
 50.1925 -
 50.1926 -        /* v.20 also look for spaced singlequotes ' and `  */
 50.1927 -        s = aline;
 50.1928 -        while (strstr(s," ' ")) {
 50.1929 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1930 -            if (!pswit[OVERVIEW_SWITCH])
 50.1931 -                printf("    Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ' ")-aline+1));
 50.1932 -            else
 50.1933 -                cnt_punct++;
 50.1934 -            s = strstr(s," ' ") + 2;
 50.1935 -            }
 50.1936 -
 50.1937 -        s = aline;
 50.1938 -        while (strstr(s," ` ")) {
 50.1939 -            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1940 -            if (!pswit[OVERVIEW_SWITCH])
 50.1941 -                printf("    Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ` ")-aline+1));
 50.1942 -            else
 50.1943 -                cnt_punct++;
 50.1944 -            s = strstr(s," ` ") + 2;
 50.1945 -            }
 50.1946 -
 50.1947 -        /* v.99 check special case of 'S instead of 's at end of word */
 50.1948 -        s = aline + 1;
 50.1949 -        while (*s) {
 50.1950 -            if (*s == CHAR_SQUOTE && *(s+1) == 'S' && *(s-1)>='a' && *(s-1)<='z')  {
 50.1951 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1952 -                if (!pswit[OVERVIEW_SWITCH])
 50.1953 -                    printf("    Line %ld column %d - Capital \"S\"?\n", linecnt, (int)(s-aline+2));
 50.1954 -                else
 50.1955 -                    cnt_punct++;
 50.1956 -                }
 50.1957 -            s++;
 50.1958 -            }
 50.1959 -
 50.1960 -
 50.1961 -        /* v.21 Now check special cases - start and end of line - */
 50.1962 -        /* for single and double quotes. Start is sometimes [sic] */
 50.1963 -        /* but better to query it anyway.                         */
 50.1964 -        /* While I'm here, check for dash at end of line          */
 50.1965 -        llen = strlen(aline);
 50.1966 -        if (llen > 1) {
 50.1967 -            if (aline[llen-1] == CHAR_DQUOTE ||
 50.1968 -                aline[llen-1] == CHAR_SQUOTE ||
 50.1969 -                aline[llen-1] == CHAR_OPEN_SQUOTE)
 50.1970 -                if (aline[llen-2] == CHAR_SPACE) {
 50.1971 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1972 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1973 -                        printf("    Line %ld column %d - Spaced quote?\n", linecnt, llen);
 50.1974 -                    else
 50.1975 -                        cnt_punct++;
 50.1976 -                    }
 50.1977 -            
 50.1978 -            /* V 0.98 removed aline[0] == CHAR_DQUOTE from the test below, since */
 50.1979 -            /* Wrongspaced quotes test also catches it for "                     */
 50.1980 -            if (aline[0] == CHAR_SQUOTE ||
 50.1981 -                aline[0] == CHAR_OPEN_SQUOTE)
 50.1982 -                if (aline[1] == CHAR_SPACE) {
 50.1983 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1984 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1985 -                        printf("    Line %ld column 1 - Spaced quote?\n", linecnt);
 50.1986 -                    else
 50.1987 -                        cnt_punct++;
 50.1988 -                    }
 50.1989 -            /* dash at end of line may well be legit - paranoid mode only */
 50.1990 -            /* and don't report em-dash at line-end                       */
 50.1991 -            if (pswit[PARANOID_SWITCH] && warn_hyphen) {
 50.1992 -                for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
 50.1993 -                if (aline[i] == '-' && aline[i-1] != '-') {
 50.1994 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.1995 -                    if (!pswit[OVERVIEW_SWITCH])
 50.1996 -                        printf("    Line %ld column %d - Hyphen at end of line?\n", linecnt, i);
 50.1997 -                    }
 50.1998 -                }
 50.1999 -            }
 50.2000 -
 50.2001 -        /* v.21 also look for brackets surrounded by alpha                    */
 50.2002 -        /* Brackets are often unspaced, but shouldn't be surrounded by alpha. */
 50.2003 -        /* If so, suspect a scanno like "a]most"                              */
 50.2004 -        llen = strlen(aline);
 50.2005 -        for (i = 1; i < llen-1; i++) {           /* for each character in the line except 1st & last*/
 50.2006 -            if (strchr("{[()]}", aline[i])         /* if it's a bracket */
 50.2007 -                && gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) {
 50.2008 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.2009 -                if (!pswit[OVERVIEW_SWITCH])
 50.2010 -                    printf("    Line %ld column %d - Unspaced bracket?\n", linecnt, i);
 50.2011 -                else
 50.2012 -                    cnt_punct++;
 50.2013 -                }
 50.2014 -            }
 50.2015 -        /* The "Cinderella" case, back in again! :-S Give it another shot */
 50.2016 -        if (warn_endquote) {
 50.2017 -            llen = strlen(aline);
 50.2018 -            for (i = 1; i < llen; i++) {           /* for each character in the line except 1st */
 50.2019 -                if (aline[i] == CHAR_DQUOTE)
 50.2020 -                    if (isalpha(aline[i-1])) {
 50.2021 -                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.2022 -                        if (!pswit[OVERVIEW_SWITCH])
 50.2023 -                            printf("    Line %ld column %d - endquote missing punctuation?\n", linecnt, i);
 50.2024 -                        else
 50.2025 -                            cnt_punct++;
 50.2026 -                        }
 50.2027 -                }
 50.2028 -            }
 50.2029 -
 50.2030 -        llen = strlen(aline);
 50.2031 -
 50.2032 -        /* Check for <HTML TAG> */
 50.2033 -        /* If there is a < in the line, followed at some point  */
 50.2034 -        /* by a > then we suspect HTML                          */
 50.2035 -        if (strstr(aline, "<") && strstr(aline, ">")) {
 50.2036 -            i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
 50.2037 -            if (i > 0) {
 50.2038 -                strncpy(wrk, strstr(aline, "<"), i);
 50.2039 -                wrk[i] = 0;
 50.2040 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.2041 -                if (!pswit[OVERVIEW_SWITCH])
 50.2042 -                    printf("    Line %ld column %d - HTML Tag? %s \n", linecnt, (int)(strstr(aline, "<") - aline) + 1, wrk);
 50.2043 -                else
 50.2044 -                    cnt_html++;
 50.2045 -                }
 50.2046 -            }
 50.2047 -
 50.2048 -        /* Check for &symbol; HTML                   */
 50.2049 -        /* If there is a & in the line, followed at  */
 50.2050 -        /* some point by a ; then we suspect HTML    */
 50.2051 -        if (strstr(aline, "&") && strstr(aline, ";")) {
 50.2052 -            i = (int)(strstr(aline, ";") - strstr(aline, "&") + 1);
 50.2053 -            for (s = strstr(aline, "&"); s < strstr(aline, ";"); s++)   
 50.2054 -                if (*s == CHAR_SPACE) i = 0;                /* 0.99 don't report "Jones & Son;" */
 50.2055 -            if (i > 0) {
 50.2056 -                strncpy(wrk, strstr(aline,"&"), i);
 50.2057 -                wrk[i] = 0;
 50.2058 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 50.2059 -                if (!pswit[OVERVIEW_SWITCH])
 50.2060 -                    printf("    Line %ld column %d - HTML symbol? %s \n", linecnt, (int)(strstr(aline, "&") - aline) + 1, wrk);
 50.2061 -                else
 50.2062 -                    cnt_html++;
 50.2063 -                }
 50.2064 -            }
 50.2065 -
 50.2066 -        /* At end of paragraph, check for mismatched quotes.           */
 50.2067 -        /* We don't want to report an error immediately, since it is a */
 50.2068 -        /* common convention to omit the quotes at end of paragraph if */
 50.2069 -        /* the next paragraph is a continuation of the same speaker.   */
 50.2070 -        /* Where this is the case, the next para should begin with a   */
 50.2071 -        /* quote, so we store the warning message and only display it  */
 50.2072 -        /* at the top of the next iteration if the new para doesn't    */
 50.2073 -        /* start with a quote.                                         */
 50.2074 -        /* The -p switch overrides this default, and warns of unclosed */
 50.2075 -        /* quotes on _every_ paragraph, whether the next begins with a */
 50.2076 -        /* quote or not.                                               */
 50.2077 -        /* Version .16 - only report mismatched single quotes if       */
 50.2078 -        /* an open_single_quotes was found.                            */
 50.2079 -
 50.2080 -        if (isemptyline) {          /* end of para - add up the totals */
 50.2081 -            if (quot % 2)
 50.2082 -                sprintf(dquote_err, "    Line %ld - Mismatched quotes\n", linecnt);
 50.2083 -            if (pswit[SQUOTE_SWITCH] && open_single_quote && (open_single_quote != close_single_quote) )
 50.2084 -                sprintf(squote_err,"    Line %ld - Mismatched singlequotes?\n", linecnt);
 50.2085 -            if (pswit[SQUOTE_SWITCH] && open_single_quote
 50.2086 -                                     && (open_single_quote != close_single_quote)
 50.2087 -                                     && (open_single_quote != close_single_quote +1) )
 50.2088 -                squot = 1;    /* flag it to be noted regardless of the first char of the next para */
 50.2089 -            if (r_brack)
 50.2090 -                sprintf(rbrack_err, "    Line %ld - Mismatched round brackets?\n", linecnt);
 50.2091 -            if (s_brack)
 50.2092 -                sprintf(sbrack_err, "    Line %ld - Mismatched square brackets?\n", linecnt);
 50.2093 -            if (c_brack)
 50.2094 -                sprintf(cbrack_err, "    Line %ld - Mismatched curly brackets?\n", linecnt);
 50.2095 -            if (c_unders % 2)
 50.2096 -                sprintf(unders_err, "    Line %ld - Mismatched underscores?\n", linecnt);
 50.2097 -            quot = s_brack = c_brack = r_brack = c_unders =
 50.2098 -                open_single_quote = close_single_quote = 0;
 50.2099 -            isnewpara = 1;     /* let the next iteration know that it's starting a new para */
 50.2100 -            }
 50.2101 -
 50.2102 -        /* V.21 _ALSO_ at end of paragraph, check for omitted punctuation. */
 50.2103 -        /*      by working back through prevline. DW.                      */
 50.2104 -        /* Hmmm. Need to check this only for "normal" paras.               */
 50.2105 -        /* So what is a "normal" para? ouch!                               */
 50.2106 -        /* Not normal if one-liner (chapter headings, etc.)                */
 50.2107 -        /* Not normal if doesn't contain at least one locase letter        */
 50.2108 -        /* Not normal if starts with space                                 */
 50.2109 -
 50.2110 -        /* 0.99 tighten up on para end checks. Disallow comma and */
 50.2111 -        /* semi-colon. Check for legit para end before quotes.    */
 50.2112 -        if (isemptyline) {          /* end of para */
 50.2113 -            for (s = prevline, i = 0; *s && !i; s++)
 50.2114 -                if (gcisletter(*s))
 50.2115 -                    i = 1;    /* use i to indicate the presence of a letter on the line */
 50.2116 -            /* This next "if" is a problem.                                             */
 50.2117 -            /* If I say "start_para_line <= linecnt - 1", that includes one-line        */
 50.2118 -            /* "paragraphs" like chapter heads. Lotsa false positives.                  */
 50.2119 -            /* If I say "start_para_line < linecnt - 1" it doesn't, but then it         */
 50.2120 -            /* misses genuine one-line paragraphs.                                      */
 50.2121 -            /* So what do I do? */
 50.2122 -            if (i
 50.2123 -                && lastblen > 2
 50.2124 -                && start_para_line < linecnt - 1
 50.2125 -                && *prevline > CHAR_SPACE
 50.2126 -                ) {
 50.2127 -                for (i = strlen(prevline)-1; (prevline[i] == CHAR_DQUOTE || prevline[i] == CHAR_SQUOTE) && prevline[i] > CHAR_SPACE && i > 0; i--);
 50.2128 -                for (  ; i > 0; i--) {
 50.2129 -                    if (gcisalpha(prevline[i])) {
 50.2130 -                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
 50.2131 -                        if (!pswit[OVERVIEW_SWITCH])
 50.2132 -                            printf("    Line %ld column %d - No punctuation at para end?\n", linecnt-1, strlen(prevline));
 50.2133 -                        else
 50.2134 -                            cnt_punct++;
 50.2135 -                        break;
 50.2136 -                        }
 50.2137 -                    if (strchr("-.:!([{?}])", prevline[i]))
 50.2138 -                        break;
 50.2139 -                    }
 50.2140 -                }
 50.2141 -            }
 50.2142 -        strcpy(prevline, aline);
 50.2143 -    }
 50.2144 -    fclose (infile);
 50.2145 -    if (!pswit[OVERVIEW_SWITCH])
 50.2146 -        for (i = 0; i < MAX_QWORD; i++)
 50.2147 -            if (dupcnt[i])
 50.2148 -                printf("\nNote: Queried word %s was duplicated %d time%s\n", qword[i], dupcnt[i], "s");
 50.2149 -}
 50.2150 -
 50.2151 -
 50.2152 -
 50.2153 -/* flgets - get one line from the input stream, checking for   */
 50.2154 -/* the existence of exactly one CR/LF line-end per line.       */
 50.2155 -/* Returns a pointer to the line.                              */
 50.2156 -
 50.2157 -char *flgets(char *theline, int maxlen, FILE *thefile, long lcnt)
 50.2158 -{
 50.2159 -    char c;
 50.2160 -    int len, isCR, cint;
 50.2161 -
 50.2162 -    *theline = 0;
 50.2163 -    len = isCR = 0;
 50.2164 -    c = cint = fgetc(thefile);
 50.2165 -    do {
 50.2166 -        if (cint == EOF)
 50.2167 -            return (NULL);
 50.2168 -        if (c == 10)  /* either way, it's end of line */
 50.2169 -            if (isCR)
 50.2170 -                break;
 50.2171 -            else {   /* Error - a LF without a preceding CR */
 50.2172 -                if (pswit[LINE_END_SWITCH]) {
 50.2173 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
 50.2174 -                    if (!pswit[OVERVIEW_SWITCH])
 50.2175 -                        printf("    Line %ld - No CR?\n", lcnt);
 50.2176 -                    else
 50.2177 -                        cnt_lineend++;
 50.2178 -                    }
 50.2179 -                break;
 50.2180 -                }
 50.2181 -        if (c == 13) {
 50.2182 -            if (isCR) { /* Error - two successive CRs */
 50.2183 -                if (pswit[LINE_END_SWITCH]) {
 50.2184 -                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
 50.2185 -                    if (!pswit[OVERVIEW_SWITCH])
 50.2186 -                        printf("    Line %ld - Two successive CRs?\n", lcnt);
 50.2187 -                    else
 50.2188 -                        cnt_lineend++;
 50.2189 -                    }
 50.2190 -                }
 50.2191 -            isCR = 1;
 50.2192 -            }
 50.2193 -        else {
 50.2194 -            if (pswit[LINE_END_SWITCH] && isCR) {
 50.2195 -                if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
 50.2196 -                if (!pswit[OVERVIEW_SWITCH])
 50.2197 -                    printf("    Line %ld column %d - CR without LF?\n", lcnt, len+1);
 50.2198 -                else
 50.2199 -                    cnt_lineend++;
 50.2200 -                }
 50.2201 -             theline[len] = c;
 50.2202 -             len++;
 50.2203 -             theline[len] = 0;
 50.2204 -             isCR = 0;
 50.2205 -             }
 50.2206 -        c = cint = fgetc(thefile);
 50.2207 -    } while(len < maxlen);
 50.2208 -    if (pswit[MARKUP_SWITCH])  
 50.2209 -        postprocess_for_HTML(theline);
 50.2210 -    if (pswit[DP_SWITCH])  
 50.2211 -        postprocess_for_DP(theline);
 50.2212 -    return(theline);
 50.2213 -}
 50.2214 -
 50.2215 -
 50.2216 -
 50.2217 -
 50.2218 -/* mixdigit - takes a "word" as a parameter, and checks whether it   */
 50.2219 -/* contains a mixture of alpha and digits. Generally, this is an     */
 50.2220 -/* error, but may not be for cases like 4th or L5 12s. 3d.           */
 50.2221 -/* Returns 0 if no error found, 1 if error.                          */
 50.2222 -
 50.2223 -int mixdigit(char *checkword)   /* check for digits like 1 or 0 in words */
 50.2224 -{
 50.2225 -    int wehaveadigit, wehavealetter, firstdigits, query, wl;
 50.2226 -    char *s;
 50.2227 -
 50.2228 -
 50.2229 -    wehaveadigit = wehavealetter = query = 0;
 50.2230 -    for (s = checkword; *s; s++)
 50.2231 -        if (gcisalpha(*s))
 50.2232 -            wehavealetter = 1;
 50.2233 -        else
 50.2234 -            if (gcisdigit(*s))
 50.2235 -                wehaveadigit = 1;
 50.2236 -    if (wehaveadigit && wehavealetter) {         /* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
 50.2237 -        query = 1;
 50.2238 -        wl = strlen(checkword);
 50.2239 -        for (firstdigits = 0; gcisdigit(checkword[firstdigits]); firstdigits++)
 50.2240 -            ;
 50.2241 -        /* digits, ending in st, rd, nd, th of either case */
 50.2242 -        /* 0.99 donovan points out an error below. Turns out */
 50.2243 -        /*      I was using matchword like strcmp when the   */
 50.2244 -        /*      return values are different! Duh.            */
 50.2245 -        if (firstdigits + 2 == wl &&
 50.2246 -              (matchword(checkword + wl - 2, "st")
 50.2247 -            || matchword(checkword + wl - 2, "rd")
 50.2248 -            || matchword(checkword + wl - 2, "nd")
 50.2249 -            || matchword(checkword + wl - 2, "th"))
 50.2250 -            )
 50.2251 -                query = 0;
 50.2252 -        if (firstdigits + 3 == wl &&
 50.2253 -              (matchword(checkword + wl - 3, "sts")
 50.2254 -            || matchword(checkword + wl - 3, "rds")
 50.2255 -            || matchword(checkword + wl - 3, "nds")
 50.2256 -            || matchword(checkword + wl - 3, "ths"))
 50.2257 -            )
 50.2258 -                query = 0;
 50.2259 -        if (firstdigits + 3 == wl &&
 50.2260 -              (matchword(checkword + wl - 4, "stly")
 50.2261 -            || matchword(checkword + wl - 4, "rdly")
 50.2262 -            || matchword(checkword + wl - 4, "ndly")
 50.2263 -            || matchword(checkword + wl - 4, "thly"))
 50.2264 -            )
 50.2265 -                query = 0;
 50.2266 -
 50.2267 -        /* digits, ending in l, L, s or d */
 50.2268 -        if (firstdigits + 1 == wl &&
 50.2269 -            (checkword[wl-1] == 'l'
 50.2270 -            || checkword[wl-1] == 'L'
 50.2271 -            || checkword[wl-1] == 's'
 50.2272 -            || checkword[wl-1] == 'd'))
 50.2273 -                query = 0;
 50.2274 -        /* L at the start of a number, representing Britsh pounds, like L500  */
 50.2275 -        /* This is cute. We know the current word is mixeddigit. If the first */
 50.2276 -        /* letter is L, there must be at least one digit following. If both   */
 50.2277 -        /* digits and letters follow, we have a genuine error, else we have a */
 50.2278 -        /* capital L followed by digits, and we accept that as a non-error.   */
 50.2279 -        if (checkword[0] == 'L')
 50.2280 -            if (!mixdigit(checkword+1))
 50.2281 -                query = 0;
 50.2282 -        }
 50.2283 -    return (query);
 50.2284 -}
 50.2285 -
 50.2286 -
 50.2287 -
 50.2288 -
 50.2289 -/* getaword - extracts the first/next "word" from the line, and puts */
 50.2290 -/* it into "thisword". A word is defined as one English word unit    */
 50.2291 -/* -- or at least that's what I'm trying for.                        */
 50.2292 -/* Returns a pointer to the position in the line where we will start */
 50.2293 -/* looking for the next word.                                        */
 50.2294 -
 50.2295 -char *getaword(char *fromline, char *thisword)
 50.2296 -{
 50.2297 -    int i, wordlen;
 50.2298 -    char *s;
 50.2299 -
 50.2300 -    wordlen = 0;
 50.2301 -    for ( ; !gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline ; fromline++ );
 50.2302 -
 50.2303 -    /* V .20                                                                   */
 50.2304 -    /* add a look-ahead to handle exceptions for numbers like 1,000 and 1.35.  */
 50.2305 -    /* Especially yucky is the case of L1,000                                  */
 50.2306 -    /* I hate this, and I see other ways, but I don't see that any is _better_.*/
 50.2307 -    /* This section looks for a pattern of characters including a digit        */
 50.2308 -    /* followed by a comma or period followed by one or more digits.           */
 50.2309 -    /* If found, it returns this whole pattern as a word; otherwise we discard */
 50.2310 -    /* the results and resume our normal programming.                          */
 50.2311 -    s = fromline;
 50.2312 -    for (  ; (gcisdigit(*s) || gcisalpha(*s) || *s == ',' || *s == '.') && wordlen < MAXWORDLEN ; s++ ) {
 50.2313 -        thisword[wordlen] = *s;
 50.2314 -        wordlen++;
 50.2315 -        }
 50.2316 -    thisword[wordlen] = 0;
 50.2317 -    for (i = 1; i < wordlen -1; i++) {
 50.2318 -        if (thisword[i] == '.' || thisword[i] == ',') {
 50.2319 -            if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1])) {   /* we have one of the damned things */
 50.2320 -                fromline = s;
 50.2321 -                return(fromline);
 50.2322 -                }
 50.2323 -            }
 50.2324 -        }
 50.2325 -
 50.2326 -    /* we didn't find a punctuated number - do the regular getword thing */
 50.2327 -    wordlen = 0;
 50.2328 -    for (  ; (gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline == '\'') && wordlen < MAXWORDLEN ; fromline++ ) {
 50.2329 -        thisword[wordlen] = *fromline;
 50.2330 -        wordlen++;
 50.2331 -        }
 50.2332 -    thisword[wordlen] = 0;
 50.2333 -    return(fromline);
 50.2334 -}
 50.2335 -
 50.2336 -
 50.2337 -
 50.2338 -
 50.2339 -
 50.2340 -/* matchword - just a case-insensitive string matcher    */
 50.2341 -/* yes, I know this is not efficient. I'll worry about   */
 50.2342 -/* that when I have a clear idea where I'm going with it.*/
 50.2343 -
 50.2344 -int matchword(char *checkfor, char *thisword)
 50.2345 -{
 50.2346 -    unsigned int ismatch, i;
 50.2347 -
 50.2348 -    if (strlen(checkfor) != strlen(thisword)) return(0);
 50.2349 -
 50.2350 -    ismatch = 1;     /* assume a match until we find a difference */
 50.2351 -    for (i = 0; i <strlen(checkfor); i++)
 50.2352 -        if (toupper(checkfor[i]) != toupper(thisword[i]))
 50.2353 -            ismatch = 0;
 50.2354 -    return (ismatch);
 50.2355 -}
 50.2356 -
 50.2357 -
 50.2358 -
 50.2359 -
 50.2360 -
 50.2361 -/* lowerit - lowercase the line. Yes, strlwr does the same job,  */
 50.2362 -/* but not on all platforms, and I'm a bit paranoid about what   */
 50.2363 -/* some implementations of tolower might do to hi-bit characters,*/
 50.2364 -/* which shouldn't matter, but better safe than sorry.           */
 50.2365 -
 50.2366 -void lowerit(char *theline)
 50.2367 -{
 50.2368 -    for ( ; *theline; theline++)
 50.2369 -        if (*theline >='A' && *theline <='Z')
 50.2370 -            *theline += 32;
 50.2371 -}
 50.2372 -
 50.2373 -
 50.2374 -/* Is this word a Roman Numeral?                                    */
 50.2375 -/* v 0.99 improved to be better. It still doesn't actually          */
 50.2376 -/* validate that the number is a valid Roman Numeral -- for example */
 50.2377 -/* it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not*/
 50.2378 -/* what we're here to do. If it passes this, it LOOKS like a Roman  */
 50.2379 -/* numeral. Anyway, the actual Romans were pretty tolerant of bad   */
 50.2380 -/* arithmetic, or expressions thereof, except when it came to taxes.*/
 50.2381 -/* Allow any number of M, an optional D, an optional CM or CD,      */
 50.2382 -/* any number of optional Cs, an optional XL or an optional XC, an  */
 50.2383 -/* optional IX or IV, an optional V and any number of optional Is.  */
 50.2384 -/* Good enough for jazz chords.                                     */
 50.2385 -
 50.2386 -int isroman(char *t)
 50.2387 -{
 50.2388 -    char *s;
 50.2389 -
 50.2390 -    if (!t || !*t) return (0);
 50.2391 -
 50.2392 -    s = t;
 50.2393 -
 50.2394 -    while (*t == 'm' && *t ) t++;
 50.2395 -    if (*t == 'd') t++;
 50.2396 -    if (*t == 'c' && *(t+1) == 'm') t+=2;
 50.2397 -    if (*t == 'c' && *(t+1) == 'd') t+=2;
 50.2398 -    while (*t == 'c' && *t) t++;
 50.2399 -    if (*t == 'x' && *(t+1) == 'l') t+=2;
 50.2400 -    if (*t == 'x' && *(t+1) == 'c') t+=2;
 50.2401 -    if (*t == 'l') t++;
 50.2402 -    while (*t == 'x' && *t) t++;
 50.2403 -    if (*t == 'i' && *(t+1) == 'x') t+=2;
 50.2404 -    if (*t == 'i' && *(t+1) == 'v') t+=2;
 50.2405 -    if (*t == 'v') t++;
 50.2406 -    while (*t == 'i' && *t) t++;
 50.2407 -    if (!*t) return (1);
 50.2408 -
 50.2409 -    return(0);
 50.2410 -}
 50.2411 -
 50.2412 -
 50.2413 -
 50.2414 -
 50.2415 -/* gcisalpha is a special version that is somewhat lenient on 8-bit texts.     */
 50.2416 -/* If we use the standard isalpha() function, 8-bit accented characters break  */
 50.2417 -/* words, so that tete with accented characters appears to be two words, "t"   */
 50.2418 -/* and "t", with 8-bit characters between them. This causes over-reporting of  */
 50.2419 -/* errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)   */
 50.2420 -/* and ISO-8859-1 character sets, which are the most common PG 8-bit types.    */
 50.2421 -
 50.2422 -int gcisalpha(unsigned char c)
 50.2423 -{
 50.2424 -    if (c >='a' && c <='z') return(1);
 50.2425 -    if (c >='A' && c <='Z') return(1);
 50.2426 -    if (c < 140) return(0);
 50.2427 -    if (c >=192 && c != 208 && c != 215 && c != 222 && c != 240 && c != 247 && c != 254) return(1);
 50.2428 -    if (c == 140 || c == 142 || c == 156 || c == 158 || c == 159) return (1);
 50.2429 -    return(0);
 50.2430 -}
 50.2431 -
 50.2432 -/* gcisdigit is a special version that doesn't get confused in 8-bit texts.    */
 50.2433 -int gcisdigit(unsigned char c)
 50.2434 -{   
 50.2435 -    if (c >= '0' && c <='9') return(1);
 50.2436 -    return(0);
 50.2437 -}
 50.2438 -
 50.2439 -/* gcisletter is a special version that doesn't get confused in 8-bit texts.    */
 50.2440 -/* Yeah, we're ISO-8891-1-specific. So sue me.                                  */
 50.2441 -int gcisletter(unsigned char c)
 50.2442 -{   
 50.2443 -    if ((c >= 'A' && c <='Z') || (c >= 'a' && c <='z') || c >= 192) return(1);
 50.2444 -    return(0);
 50.2445 -}
 50.2446 -
 50.2447 -
 50.2448 -
 50.2449 -
 50.2450 -/* gcstrchr wraps strchr to return NULL if the character being searched for is zero */
 50.2451 -
 50.2452 -char *gcstrchr(char *s, char c)
 50.2453 -{
 50.2454 -    if (c == 0) return(NULL);
 50.2455 -    return(strchr(s,c));
 50.2456 -}
 50.2457 -
 50.2458 -/* postprocess_for_DP is derived from postprocess_for_HTML          */
 50.2459 -/* It is invoked with the -d switch from flgets().                  */
 50.2460 -/* It simply "removes" from the line a hard-coded set of common     */
 50.2461 -/* DP-specific tags, so that the line passed to the main routine has*/
 50.2462 -/* been pre-cleaned of DP markup.                                   */
 50.2463 -
 50.2464 -void postprocess_for_DP(char *theline)
 50.2465 -{
 50.2466 -
 50.2467 -    char *s, *t;
 50.2468 -    int i;
 50.2469 -
 50.2470 -    if (!*theline) 
 50.2471 -        return;
 50.2472 -
 50.2473 -    for (i = 0; *DPmarkup[i]; i++) {
 50.2474 -        s = strstr(theline, DPmarkup[i]);
 50.2475 -        while (s) {
 50.2476 -            t = s + strlen(DPmarkup[i]);
 50.2477 -            while (*t) {
 50.2478 -                *s = *t;
 50.2479 -                t++; s++;
 50.2480 -                }
 50.2481 -            *s = 0;
 50.2482 -            s = strstr(theline, DPmarkup[i]);
 50.2483 -            }
 50.2484 -        }
 50.2485 -
 50.2486 -}
 50.2487 -
 50.2488 -
 50.2489 -/* postprocess_for_HTML is, at the moment (0.97), a very nasty      */
 50.2490 -/* short-term fix for Charlz. Nasty, nasty, nasty.                  */
 50.2491 -/* It is invoked with the -m switch from flgets().                  */
 50.2492 -/* It simply "removes" from the line a hard-coded set of common     */
 50.2493 -/* HTML tags and "replaces" a hard-coded set of common HTML         */
 50.2494 -/* entities, so that the line passed to the main routine has        */
 50.2495 -/* been pre-cleaned of HTML. This is _so_ not the right way to      */
 50.2496 -/* deal with HTML, but what Charlz needs now is not HTML handling   */
 50.2497 -/* proper: just ignoring <i> tags and some others.                  */
 50.2498 -/* To be revisited in future releases!                              */
 50.2499 -
 50.2500 -void postprocess_for_HTML(char *theline)
 50.2501 -{
 50.2502 -
 50.2503 -    if (strstr(theline, "<") && strstr(theline, ">"))
 50.2504 -        while (losemarkup(theline))
 50.2505 -            ;
 50.2506 -    while (loseentities(theline))
 50.2507 -        ;
 50.2508 -}
 50.2509 -
 50.2510 -char *losemarkup(char *theline)
 50.2511 -{
 50.2512 -    char *s, *t;
 50.2513 -    int i;
 50.2514 -
 50.2515 -    if (!*theline) 
 50.2516 -        return(NULL);
 50.2517 -
 50.2518 -    s = strstr(theline, "<");
 50.2519 -    t = strstr(theline, ">");
 50.2520 -    if (!s || !t) return(NULL);
 50.2521 -    for (i = 0; *markup[i]; i++)
 50.2522 -        if (!tagcomp(s+1, markup[i])) {
 50.2523 -            if (!*(t+1)) {
 50.2524 -                *s = 0;
 50.2525 -                return(s);
 50.2526 -                }
 50.2527 -            else
 50.2528 -                if (t > s) {
 50.2529 -                    strcpy(s, t+1);
 50.2530 -                    return(s);
 50.2531 -                    }
 50.2532 -        }
 50.2533 -    /* it's an unrecognized <xxx> */
 50.2534 -    return(NULL);
 50.2535 -}
 50.2536 -
 50.2537 -char *loseentities(char *theline)
 50.2538 -{
 50.2539 -    int i;
 50.2540 -    char *s, *t;
 50.2541 -
 50.2542 -    if (!*theline) 
 50.2543 -        return(NULL);
 50.2544 -
 50.2545 -    for (i = 0; *entities[i].htmlent; i++) {
 50.2546 -        s = strstr(theline, entities[i].htmlent);
 50.2547 -        if (s) {
 50.2548 -            t = malloc((size_t)strlen(s));
 50.2549 -            if (!t) return(NULL);
 50.2550 -            strcpy(t, s + strlen(entities[i].htmlent));
 50.2551 -            strcpy(s, entities[i].textent);
 50.2552 -            strcat(s, t);
 50.2553 -            free(t);
 50.2554 -            return(theline);
 50.2555 -            }
 50.2556 -        }
 50.2557 -
 50.2558 -    /* V0.97 Duh. Forgot to check the htmlnum member */
 50.2559 -    for (i = 0; *entities[i].htmlnum; i++) {
 50.2560 -        s = strstr(theline, entities[i].htmlnum);
 50.2561 -        if (s) {
 50.2562 -            t = malloc((size_t)strlen(s));
 50.2563 -            if (!t) return(NULL);
 50.2564 -            strcpy(t, s + strlen(entities[i].htmlnum));
 50.2565 -            strcpy(s, entities[i].textent);
 50.2566 -            strcat(s, t);
 50.2567 -            free(t);
 50.2568 -            return(theline);
 50.2569 -            }
 50.2570 -        }
 50.2571 -    return(NULL);
 50.2572 -}
 50.2573 -
 50.2574 -
 50.2575 -int tagcomp(char *strin, char *basetag)
 50.2576 -{
 50.2577 -    char *s, *t;
 50.2578 -
 50.2579 -    s = basetag;
 50.2580 -    t  = strin;
 50.2581 -    if (*t == '/') t++; /* ignore a slash */
 50.2582 -    while (*s && *t) {
 50.2583 -        if (tolower(*s) != tolower(*t)) return(1);
 50.2584 -        s++; t++;
 50.2585 -        }
 50.2586 -    /* OK, we have < followed by a valid tag start  */
 50.2587 -    /* should I do something about length?          */
 50.2588 -    /* this is messy. The length of an <i> tag is   */
 50.2589 -    /* limited, but a <table> could go on for miles */
 50.2590 -    /* so I'd have to parse the tags . . . ugh.     */
 50.2591 -    /* It isn't what Charlz needs now, so mark it   */
 50.2592 -    /* as 'pending'.                                */
 50.2593 -    return(0);
 50.2594 -}
 50.2595 -
 50.2596 -void proghelp()                  /* explain program usage here */
 50.2597 -{
 50.2598 -    fputs("V. 0.991. Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
 50.2599 -    fputs("Gutcheck comes wih ABSOLUTELY NO WARRANTY. For details, read the file COPYING.\n", stderr);
 50.2600 -    fputs("This is Free Software; you may redistribute it under certain conditions (GPL);\n", stderr);
 50.2601 -    fputs("read the file COPYING for details.\n\n", stderr);
 50.2602 -    fputs("Usage is: gutcheck [-setpxloyhud] filename\n",stderr);
 50.2603 -    fputs("  where -s checks single quotes, -e suppresses echoing lines, -t checks typos\n",stderr);
 50.2604 -    fputs("  -x (paranoid) switches OFF -t and extra checks, -l turns OFF line-end checks\n",stderr);
 50.2605 -    fputs("  -o just displays overview without detail, -h echoes header fields\n",stderr);
 50.2606 -    fputs("  -v (verbose) unsuppresses duplicate reporting, -m suppresses markup\n",stderr);
 50.2607 -    fputs("  -d ignores DP-specific markup,\n",stderr);
 50.2608 -    fputs("  -u uses a file gutcheck.typ to query user-defined possible typos\n",stderr);
 50.2609 -    fputs("Sample usage: gutcheck warpeace.txt \n",stderr);
 50.2610 -    fputs("\n",stderr);
 50.2611 -    fputs("Gutcheck looks for errors in Project Gutenberg(TM) etexts.\n", stderr);
 50.2612 -    fputs("Gutcheck queries anything it thinks shouldn't be in a PG text; non-ASCII\n",stderr);
 50.2613 -    fputs("characters like accented letters, lines longer than 75 or shorter than 55,\n",stderr);
 50.2614 -    fputs("unbalanced quotes or brackets, a variety of badly formatted punctuation, \n",stderr);
 50.2615 -    fputs("HTML tags, some likely typos. It is NOT a substitute for human judgement.\n",stderr);
 50.2616 -    fputs("\n",stderr);
 50.2617 -}
 50.2618 -
 50.2619 -
 50.2620 -
 50.2621 -/*********************************************************************
 50.2622 -  Revision History:
 50.2623 -
 50.2624 -  04/22/01 Cleaned up some stuff and released .10
 50.2625 -
 50.2626 -           ---------------
 50.2627 -
 50.2628 -  05/09/01 Added the typo list, added two extra cases of he/be error,
 50.2629 -           added -p switch, OPEN_SINGLE QUOTE char as .11
 50.2630 -
 50.2631 -           ---------------
 50.2632 -
 50.2633 -  05/20/01 Increased the typo list,
 50.2634 -           added paranoid mode,
 50.2635 -           ANSIfied the code and added some casts
 50.2636 -              so the compiler wouldn't keep asking if I knew what I was doing,
 50.2637 -           fixed bug in l.s.d. condition (thanks, Dave!),
 50.2638 -           standardized spacing when echoing,
 50.2639 -           added letter-combo checking code to typo section,
 50.2640 -           added more h/b words to typo array.
 50.2641 -           Not too sure about putting letter combos outside of the TYPO conditions -
 50.2642 -           someone is sure to have a book about the tbaka tribe, or something. Anyway, let's see.
 50.2643 -           Released as .12
 50.2644 -
 50.2645 -           ---------------
 50.2646 -
 50.2647 -  06/01/01 Removed duplicate reporting of Tildes, asterisks, etc.
 50.2648 -  06/10/01 Added flgets routine to help with platform-independent
 50.2649 -           detection of invalid line-ends. All PG text files should
 50.2650 -           have CR/LF (13/10) at end of line, regardless of system.
 50.2651 -           Gutcheck now validates this by default. (Thanks, Charles!)
 50.2652 -           Released as .13
 50.2653 -
 50.2654 -           ---------------
 50.2655 -
 50.2656 -  06/11/01 Added parenthesis match checking. (c_brack, cbrack_err etc.)
 50.2657 -           Released as .14
 50.2658 -
 50.2659 -           ---------------
 50.2660 -
 50.2661 -  06/23/01 Fixed: 'No',he said. not being flagged.
 50.2662 -
 50.2663 -           Improved: better single-quotes checking:
 50.2664 -
 50.2665 -           Ignore singlequotes surrounded by alpha, like didn't. (was OK)
 50.2666 -
 50.2667 -           If a singlequote is at the END of a word AND the word ends in "s":
 50.2668 -                  The dogs' tails wagged.
 50.2669 -           it's probably an apostrophe, but less commonly may be a closequote:
 50.2670 -                  "These 'pack dogs' of yours look more like wolves."
 50.2671 -
 50.2672 -           If it's got punctuation before it and is followed by a space
 50.2673 -           or punctuation:
 50.2674 -              . . . was a problem,' he said
 50.2675 -              . . . was a problem,'"
 50.2676 -           it is probably (certainly?) a closequote.
 50.2677 -
 50.2678 -           If it's at start of paragraph, it's probably an openquote.
 50.2679 -              (but watch dialect)
 50.2680 -
 50.2681 -           Words with ' at beginning and end are probably quoted:
 50.2682 -               "You have the word 'chivalry' frequently on your lips."
 50.2683 -               (Not specifically implemented)
 50.2684 -           V.18 I'm glad I didn't implement this, 'cos it jest ain't so
 50.2685 -           where the convention is to punctuate outside the quotes.
 50.2686 -               'Come', he said, 'and join the party'.
 50.2687 -
 50.2688 -           If it is followed by an alpha, and especially a capital:
 50.2689 -              'Hello,' called he.
 50.2690 -           it is either an openquote or dialect.
 50.2691 -
 50.2692 -           Dialect breaks ALL the rules:
 50.2693 -                  A man's a man for a' that.
 50.2694 -                  "Aye, but 'tis all in the pas' now."
 50.2695 -                  "'Tis often the way," he said.
 50.2696 -                  'Ave a drink on me.
 50.2697 -
 50.2698 -           This version looks to be an improvement, and produces
 50.2699 -           fewer false positives, but is still not perfect. The
 50.2700 -           'pack dogs' case still fools it, and dialect is still
 50.2701 -           a problem. Oh, well, it's an improvement, and I have
 50.2702 -           a weighted structure in place for refining guesses at
 50.2703 -           closequotes. Maybe next time, I'll add a bit of logic
 50.2704 -           where if there is an open quote and one that was guessed
 50.2705 -           to be a possessive apostrophe after s, I'll re-guess it
 50.2706 -           to be a closequote. Let's see how this one flies, first.
 50.2707 -
 50.2708 -           (Afterview: it's still crap. Needs much work, and a deeper insight.)
 50.2709 -
 50.2710 -           Released as .15
 50.2711 -
 50.2712 -           TODO: More he/be checks. Can't be perfect - counterexamples:
 50.2713 -              I gave my son good advice: be married regardless of the world's opinion.
 50.2714 -              I gave my son good advice: he married regardless of the world's opinion.
 50.2715 -
 50.2716 -              If by "primitive" be meant "crude", we can understand the sentence.
 50.2717 -              If by "primitive" he meant "crude", we can understand the sentence.
 50.2718 -
 50.2719 -              No matter what be said, I must go on.
 50.2720 -              No matter what he said, I must go on.
 50.2721 -
 50.2722 -              No value, however great, can be set upon them.
 50.2723 -              No value, however great, can he set upon them.
 50.2724 -
 50.2725 -              Real-Life one from a DP International Weekly Miscellany:
 50.2726 -                He wandered through the forest without fear, sleeping
 50.2727 -                much, for in sleep be had companionship--the Great
 50.2728 -                Spirit teaching him what he should know in dreams.
 50.2729 -                That one found by jeebies, and it turned out to be "he".
 50.2730 -
 50.2731 -
 50.2732 -           ---------------
 50.2733 -
 50.2734 -  07/01/01 Added -O option.
 50.2735 -           Improved singlequotes by reporting mismatched single quotes
 50.2736 -           only if an open_single_quotes was found.
 50.2737 -
 50.2738 -           Released as .16
 50.2739 -
 50.2740 -           ---------------
 50.2741 -
 50.2742 -  08/27/01 Added -Y switch for Robert Rowe to allow his app to
 50.2743 -           catch the error output.
 50.2744 -
 50.2745 -           Released as .17
 50.2746 -
 50.2747 -           ---------------
 50.2748 -
 50.2749 -  09/08/01 Added checking Capitals at start of paragraph, but not
 50.2750 -           checking them at start of sentence.
 50.2751 -
 50.2752 -           TODO: Parse sentences out so can check reliably for start of
 50.2753 -                 sentence. Need a whole different approach for that.
 50.2754 -                 (Can't just rely on periods, since they are also
 50.2755 -                 used for abbreviations, etc.)
 50.2756 -
 50.2757 -           Added checking for all vowels or all consonants in a word.
 50.2758 -
 50.2759 -           While I was in, I added "ii" checking and "tl" at start of word.
 50.2760 -
 50.2761 -           Added echoing of first line of paragraph when reporting
 50.2762 -           mismatched quoted or brackets (thanks to David Widger for the
 50.2763 -           suggestion)
 50.2764 -
 50.2765 -           Not querying L at start of a number (used for British pounds).
 50.2766 -
 50.2767 -           The spelling changes are sort of half-done but released anyway
 50.2768 -           Skipped .18 because I had given out a couple of test versions
 50.2769 -           with that number.
 50.2770 -
 50.2771 -  09/25/01 Released as .19
 50.2772 -
 50.2773 -           ---------------
 50.2774 -
 50.2775 -           TODO:
 50.2776 -           Use the logic from my new version of safewrap to stop querying
 50.2777 -             short lines like poems and TOCs.
 50.2778 -           Ignore non-standard ellipses like .  .  . or ...
 50.2779 -
 50.2780 -
 50.2781 -           ---------------
 50.2782 -  10/01/01 Made any line over 80 a VERY long line (was 85).
 50.2783 -           Recognized openquotes on indented paragraphs as continuations
 50.2784 -               of the same speech.
 50.2785 -           Added "cf" to the okword list (how did I forget _that_?) and a few others.
 50.2786 -           Moved abbrev to okword and made it more general.
 50.2787 -           Removed requirement that PG_space_emdash be greater than
 50.2788 -               ten before turning off warnings about spaced dashes.
 50.2789 -           Added period to list of characters that might constitute a separator line.
 50.2790 -           Now checking for double punctuation (Thanks, David!)
 50.2791 -           Now if two spaced em-dashes on a line, reports both. (DW)
 50.2792 -           Bug: Wasn't catching spaced punctuation at line-end since I
 50.2793 -               added flgets in version .13 - fixed.
 50.2794 -           Bug: Wasn't catching spaced singlequotes - fixed
 50.2795 -           Now reads punctuated numbers like 1,000 as a single word.
 50.2796 -               (Used to give "standalone 1" type  queries)
 50.2797 -           Changed paranoid mode - not including s and p options. -ex is now quite usable.
 50.2798 -           Bug: was calling `"For it is perfectly impossible,"    Unspaced Quotes - fixed
 50.2799 -           Bug: Sometimes gave _next_ line number for queried word at end of line - fixed
 50.2800 -
 50.2801 -  10/22/01 Released as .20
 50.2802 -
 50.2803 -           ---------------
 50.2804 -
 50.2805 -           Added count of lines with spaces at end. (cnt_spacend) (Thanks, Brett!)
 50.2806 -           Reduced the number of hi-bit letters needed to stop reporting them
 50.2807 -               from 1/20 to 1/100 or 200 in total.
 50.2808 -           Added PG footer check.
 50.2809 -           Added the -h switch.
 50.2810 -           Fixed platform-specific CHAR_EOL checking for isemptyline - changed to 13 and 10
 50.2811 -           Not reporting ".," when there are many of them, such as a book with many references to "Vol 1., p. 23"
 50.2812 -           Added unspaced brackets check when surrounded by alpha.
 50.2813 -           Removed all typo reporting unless the typo switch is on.
 50.2814 -           Added gcisalpha to ease over-reporting of 8-bit queries.
 50.2815 -           ECHO_SWITCH is now ON by default!
 50.2816 -           PARANOID_SWITCH is now ON by default!
 50.2817 -           Checking for ">From" placed there by e-mail MTA (Thanks Andrew & Greg)
 50.2818 -           Checking for standalone lowercase "l"
 50.2819 -           Checking for standalone lowercase "s"
 50.2820 -           Considering "is be" and "be is" "be was" "was be" as he/be errors
 50.2821 -           Looking at punct at end of para
 50.2822 -
 50.2823 -  01/20/02 Released as .21
 50.2824 -
 50.2825 -           ---------------
 50.2826 -
 50.2827 -           Added VERBOSE_SWITCH to make it list everything. (George Davis)
 50.2828 -
 50.2829 -           ---------------
 50.2830 -
 50.2831 -  02/17/02 Added cint in flgets to try fix an EOF failure on a compiler I don't have.
 50.2832 -           after which
 50.2833 -           This line caused a coredump on Solaris - fixed.
 50.2834 -                Da sagte die Figur: " Das ist alles gar schoen, und man mag die Puppe
 50.2835 -  03/09/02 Changed header recognition for another header change
 50.2836 -           Called it .24
 50.2837 -  03/29/02 Added qword[][] so I can suppress massive overreporting
 50.2838 -           of queried "words" like "FN", "Wm.", "th'", people's 
 50.2839 -           initials, chemical formulae and suchlike in some texts.
 50.2840 -           Called it .25
 50.2841 -  04/07/02 The qword summary reports at end shouldn't show in OVERVIEW mode. Fixed.
 50.2842 -           Added linecounts in overview mode.
 50.2843 -           Wow! gutcheck gutcheck.exe doesn't report a binary! :-) Need to tighten up. Done.
 50.2844 -           "m" is a not uncommon scanno for "in", but also appears in "a.m." - Can I get round that?
 50.2845 -  07/07/02 Added GPL.
 50.2846 -           Added checking for broken em-dash at line-end (enddash)
 50.2847 -           Released as 0.95
 50.2848 -  08/17/02 Fixed a bug that treated some hi-bit characters as spaces. Thanks, Carlo.
 50.2849 -           Released as 0.96
 50.2850 -  10/10/02 Suppressing some annoying multiple reports by default:
 50.2851 -           Standalone Ones, Asterisks, Square Brackets.
 50.2852 -              Digit 1 occurs often in many scientific texts.
 50.2853 -              Asterisk occurs often in multi-footnoted texts.
 50.2854 -              Mismatch Square Brackets occurs often in multi-para footnotes.
 50.2855 -           Added -m switch for Charlz. Horrible. Nasty. Kludgy. Evil.
 50.2856 -              . . . but it does more or less work for the main cases.
 50.2857 -           Removed uppercase within a word as a separate category so
 50.2858 -           that names like VanAllen get reported only once, like other
 50.2859 -           suspected typos.
 50.2860 -  11/24/02 Fixed - -m switch wasn't looking at htmlnum in
 50.2861 -           loseentities (Thanks, Brett!)
 50.2862 -           Fixed bug which occasionally gave false warning of
 50.2863 -           paragraph starting with lowercase.
 50.2864 -           Added underscore as character not to query around doublequotes.
 50.2865 -           Split the "Non-ASCII" message into "Non-ASCII" vs. "Non-ISO-8859"
 50.2866 -           . . . this is to help detect things like CP1252 characters.
 50.2867 -           Released as 0.97
 50.2868 -
 50.2869 -  12/01/02 Hacked a simplified version of the "Wrongspaced quotes" out of gutspell,
 50.2870 -           for doublequotes only. Replaces "Spaced quote", since it also covers that
 50.2871 -           case.
 50.2872 -           Added "warn_hyphen" to ease over-reporting of hyphens.
 50.2873 -
 50.2874 -  12/20/02 Added "extra period" checks.
 50.2875 -           Added single character line check
 50.2876 -           Added I" check - is usually an exclam
 50.2877 -           Released as 0.98
 50.2878 -
 50.2879 -  1/5/03   Eeek! Left in a lowerit(argv[0]) at the start before procfile()
 50.2880 -           from when I was looking at ways to identify markup. Refuses to
 50.2881 -           open files for *nix users with upcase in the filemanes. Removed.
 50.2882 -           Fixed quickly and released as 0.981
 50.2883 -
 50.2884 -  1/8/03   Added "arid" to the list of typos, slightly against my better
 50.2885 -           judgement, but the DP gang are all excited about it. :-)
 50.2886 -           Added a check for comma followed by capital letter, where
 50.2887 -           a period has OCRed into a comma. (DW). Not sure about this
 50.2888 -           either; we'll see.
 50.2889 -           Compiling for Win32 to allow longfilenames.
 50.2890 -
 50.2891 -  6/1/04   A messy test release for DW to include the "gutcheck.typ"
 50.2892 -           process. And the gutcheck.jee trials. Removed "arid" --
 50.2893 -           it can go in gutcheck.typ
 50.2894 -
 50.2895 -           Added checks for carats ^ and slants / but disabling slant
 50.2896 -           queries if more than 20 of them, because some people use them
 50.2897 -           for /italics/. Slants are commonly mistaken italic "I"s.
 50.2898 -
 50.2899 -           Later: removed gutcheck.jee -- wrote jeebies instead.
 50.2900 -
 50.2901 -Random TODO: 
 50.2902 -           Check brackets more closely, like quotes, so that it becomes
 50.2903 -           easy to find the error in long paragraphs full of brackets.
 50.2904 -
 50.2905 -
 50.2906 -  11/4/04  Assorted cleanup. Fixed case where text started with an
 50.2907 -           unbalanced paragraph.
 50.2908 -
 50.2909 -  1/2/05   Has it really been that long? Added "nocomma", "noperiod" check.
 50.2910 -           Bits and pieces: improved isroman(). Added isletter().
 50.2911 -           Other stuff I never noted before this.
 50.2912 -
 50.2913 -  7/3/05   Stuck in a quick start on DP-markup ignoring 
 50.2914 -           at BillFlis's suggestion.
 50.2915 -
 50.2916 -  1/23/06  Took out nocomma etc if typos are off. Why did I ever leave that in?
 50.2917 -           Don't count footer for dotcomma etc.
 50.2918 -
 50.2919 -
 50.2920 -1       I
 50.2921 -ail     all
 50.2922 -arc     are
 50.2923 -arid    and
 50.2924 -bad     had
 50.2925 -ball    hall
 50.2926 -band    hand
 50.2927 -bar     her
 50.2928 -bat     but
 50.2929 -be      he
 50.2930 -bead    head
 50.2931 -beads   heads
 50.2932 -bear    hear
 50.2933 -bit     hit
 50.2934 -bo      be
 50.2935 -boon    been
 50.2936 -borne   home
 50.2937 -bow     how
 50.2938 -bumbled humbled
 50.2939 -car     ear
 50.2940 -carnage carriage
 50.2941 -carne   came
 50.2942 -cast    east
 50.2943 -cat     cut
 50.2944 -cat     eat
 50.2945 -cheek   check
 50.2946 -clay    day
 50.2947 -coining coming
 50.2948 -comer   corner
 50.2949 -die     she
 50.2950 -docs    does
 50.2951 -ease    case
 50.2952 -fail    fall
 50.2953 -fee     he
 50.2954 -haying  having
 50.2955 -ho      he
 50.2956 -ho      who
 50.2957 -hut     but
 50.2958 -is      as
 50.2959 -lie     he
 50.2960 -lime    time
 50.2961 -loth    10th
 50.2962 -m       in
 50.2963 -modem   modern
 50.2964 -Ms      his
 50.2965 -ray     away
 50.2966 -ray     my
 50.2967 -ringer  finger
 50.2968 -ringers fingers
 50.2969 -rioted  noted
 50.2970 -tho     the
 50.2971 -tie     he
 50.2972 -tie     the
 50.2973 -tier    her
 50.2974 -tight   right
 50.2975 -tile    the
 50.2976 -tiling  thing
 50.2977 -tip     up
 50.2978 -tram    train
 50.2979 -tune    time
 50.2980 -u       "
 50.2981 -wen     well
 50.2982 -yon     you
 50.2983 -
 50.2984 -*********************************************************************/
 50.2985 -

    51.1 --- a/gutcheck/gutcheck.typ.in	Fri Jan 27 00:28:11 2012 +0000
    51.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    51.3 @@ -1,54 +0,0 @@
    51.4 -11
    51.5 -44
    51.6 -ms
    51.7 -ail
    51.8 -alien
    51.9 -arc
   51.10 -arid
   51.11 -bar
   51.12 -bat
   51.13 -bo
   51.14 -borne
   51.15 -bow
   51.16 -bum
   51.17 -bumbled
   51.18 -carnage
   51.19 -carne
   51.20 -cither
   51.21 -coining
   51.22 -comer
   51.23 -cur
   51.24 -docs
   51.25 -eve
   51.26 -eves
   51.27 -gaming
   51.28 -gram
   51.29 -guru
   51.30 -hag
   51.31 -hare
   51.32 -haying
   51.33 -ho
   51.34 -lime
   51.35 -loth
   51.36 -m
   51.37 -modem
   51.38 -nave
   51.39 -ringer
   51.40 -ringers
   51.41 -riot
   51.42 -rioted
   51.43 -signer
   51.44 -snore
   51.45 -spam
   51.46 -tho
   51.47 -tier
   51.48 -tile
   51.49 -tiling
   51.50 -tram
   51.51 -tum
   51.52 -tune
   51.53 -u
   51.54 -vas
   51.55 -wag
   51.56 -wen
   51.57 -yon

    52.1 --- a/test/compatibility/Makefile.am	Fri Jan 27 00:28:11 2012 +0000
    52.2 +++ b/test/compatibility/Makefile.am	Fri Jan 27 10:30:16 2012 +0000
    52.3 @@ -1,4 +1,4 @@
    52.4 -TESTS_ENVIRONMENT=GUTCHECK=../../gutcheck/gutcheck ../harness/gc-test
    52.5 +TESTS_ENVIRONMENT=BOOKLOUPE=../../bookloupe/bookloupe ../harness/loupe-test
    52.6  TESTS=missing-space.tst spaced-punctuation.tst html-tag.tst html-symbol.tst \
    52.7  	spaced-doublequote.tst mismatched-quotes.tst he-be.tst digits.tst \
    52.8  	extra-period.tst ellipsis.tst short-line.tst abbreviation.tst \

    53.1 --- a/test/harness/Makefile.am	Fri Jan 27 00:28:11 2012 +0000
    53.2 +++ b/test/harness/Makefile.am	Fri Jan 27 10:30:16 2012 +0000
    53.3 @@ -1,8 +1,8 @@
    53.4  INCLUDES=-I$(top_srcdir)
    53.5 -bin_PROGRAMS=gc-test
    53.6 +bin_PROGRAMS=loupe-test
    53.7  AM_CFLAGS=$(GLIB_CFLAGS)
    53.8  LIBS=$(GLIB_LIBS)
    53.9  
   53.10 -gc_test_SOURCES=gc-test.c testcase.c testcase.h testcaseio.c testcaseio.h \
   53.11 -	testcaseparser.c testcaseparser.h
   53.12 -gc_test_LDADD=../../gclib/libgc.la
   53.13 +loupe_test_SOURCES=loupe-test.c testcase.c testcase.h testcaseio.c \
   53.14 +	testcaseio.h testcaseparser.c testcaseparser.h
   53.15 +loupe_test_LDADD=../../bl/libbl.la

    54.1 --- a/test/harness/gc-test.c	Fri Jan 27 00:28:11 2012 +0000
    54.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    54.3 @@ -1,31 +0,0 @@
    54.4 -#include <stdlib.h>
    54.5 -#include <stdio.h>
    54.6 -#include <string.h>
    54.7 -#include <gclib/gclib.h>
    54.8 -#include "testcase.h"
    54.9 -#include "testcaseio.h"
   54.10 -
   54.11 -/*
   54.12 - * Returns FALSE if the test should be considered to have failed.
   54.13 - * (returns TRUE on pass or expected-fail).
   54.14 - */
   54.15 -boolean run_test(const char *filename)
   54.16 -{
   54.17 -    Testcase *testcase;
   54.18 -    boolean retval;
   54.19 -    testcase=testcase_parse_file(filename);
   54.20 -    if (!testcase)
   54.21 -	return FALSE;
   54.22 -    retval=testcase_run(testcase);
   54.23 -    testcase_free(testcase);
   54.24 -    return retval;
   54.25 -}
   54.26 -
   54.27 -int main(int argc,char **argv)
   54.28 -{
   54.29 -    int i;
   54.30 -    boolean pass=TRUE;
   54.31 -    for(i=1;i<argc;i++)
   54.32 -	pass&=run_test(argv[i]);
   54.33 -    return pass?0:1;
   54.34 -}

    55.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    55.2 +++ b/test/harness/loupe-test.c	Fri Jan 27 10:30:16 2012 +0000
    55.3 @@ -0,0 +1,31 @@
    55.4 +#include <stdlib.h>
    55.5 +#include <stdio.h>
    55.6 +#include <string.h>
    55.7 +#include <bl/bl.h>
    55.8 +#include "testcase.h"
    55.9 +#include "testcaseio.h"
   55.10 +
   55.11 +/*
   55.12 + * Returns FALSE if the test should be considered to have failed.
   55.13 + * (returns TRUE on pass or expected-fail).
   55.14 + */
   55.15 +boolean run_test(const char *filename)
   55.16 +{
   55.17 +    Testcase *testcase;
   55.18 +    boolean retval;
   55.19 +    testcase=testcase_parse_file(filename);
   55.20 +    if (!testcase)
   55.21 +	return FALSE;
   55.22 +    retval=testcase_run(testcase);
   55.23 +    testcase_free(testcase);
   55.24 +    return retval;
   55.25 +}
   55.26 +
   55.27 +int main(int argc,char **argv)
   55.28 +{
   55.29 +    int i;
   55.30 +    boolean pass=TRUE;
   55.31 +    for(i=1;i<argc;i++)
   55.32 +	pass&=run_test(argv[i]);
   55.33 +    return pass?0:1;
   55.34 +}

    56.1 --- a/test/harness/testcase.c	Fri Jan 27 00:28:11 2012 +0000
    56.2 +++ b/test/harness/testcase.c	Fri Jan 27 10:30:16 2012 +0000
    56.3 @@ -7,7 +7,7 @@
    56.4  #include <io.h>
    56.5  #endif
    56.6  #include <fcntl.h>
    56.7 -#include <gclib/gclib.h>
    56.8 +#include <bl/bl.h>
    56.9  #include "testcase.h"
   56.10  
   56.11  #if !HAVE_MKSTEMP
   56.12 @@ -124,9 +124,9 @@
   56.13  	return FALSE;
   56.14      }
   56.15      close(fd);
   56.16 -    command[0]=getenv("GUTCHECK");
   56.17 +    command[0]=getenv("BOOKLOUPE");
   56.18      if (!command[0])
   56.19 -	command[0]="." GC_DIR_SEPARATOR_S "gutcheck";
   56.20 +	command[0]="." BL_DIR_SEPARATOR_S "bookloupe";
   56.21      command[1]=input;
   56.22      command[2]=NULL;
   56.23      if (testcase->expected)
   56.24 @@ -157,7 +157,7 @@
   56.25  	fprintf(stderr,"%s: FAIL\n",testcase->basename);
   56.26  	offset=common_prefix_length(output,expected->str);
   56.27  	if (offset==header_len && !output[offset])
   56.28 -	    fprintf(stderr,"Unexpected zero warnings from gutcheck.\n");
   56.29 +	    fprintf(stderr,"Unexpected zero warnings from bookloupe.\n");
   56.30  	else
   56.31  	{
   56.32  	    endp=strchr(output+offset,'\n');
   56.33 @@ -171,7 +171,7 @@
   56.34  	    else
   56.35  		bol=report->str;
   56.36  	    col=offset-(bol-report->str);
   56.37 -	    fprintf(stderr,"Unexpected output from gutcheck:\n");
   56.38 +	    fprintf(stderr,"Unexpected output from bookloupe:\n");
   56.39  	    if (report->len>=header_len)
   56.40  		fprintf(stderr,"%s\n%*s^\n",report->str+header_len,col,"");
   56.41  	    else
   56.42 @@ -185,7 +185,7 @@
   56.43      string_free(expected,TRUE);
   56.44      mem_free(output);
   56.45      if (exit_status)
   56.46 -	fprintf(stderr,"gutcheck exited with code %d\n",r);
   56.47 +	fprintf(stderr,"bookloupe exited with code %d\n",r);
   56.48      if (!exit_status)
   56.49  	fprintf(stderr,"%s: PASS\n",testcase->basename);
   56.50      return !exit_status;

    57.1 --- a/test/harness/testcaseio.c	Fri Jan 27 00:28:11 2012 +0000
    57.2 +++ b/test/harness/testcaseio.c	Fri Jan 27 10:30:16 2012 +0000
    57.3 @@ -1,7 +1,7 @@
    57.4  #include <stdlib.h>
    57.5  #include <stdio.h>
    57.6  #include <string.h>
    57.7 -#include <gclib/gclib.h>
    57.8 +#include <bl/bl.h>
    57.9  #include "testcaseparser.h"
   57.10  #include "testcaseio.h"
   57.11  

    58.1 --- a/test/harness/testcaseparser.c	Fri Jan 27 00:28:11 2012 +0000
    58.2 +++ b/test/harness/testcaseparser.c	Fri Jan 27 10:30:16 2012 +0000
    58.3 @@ -2,7 +2,7 @@
    58.4  #include <stdio.h>
    58.5  #include <string.h>
    58.6  #include <ctype.h>
    58.7 -#include <gclib/gclib.h>
    58.8 +#include <bl/bl.h>
    58.9  #include "testcaseparser.h"
   58.10  
   58.11  /*

    59.1 --- a/test/harness/testcaseparser.h	Fri Jan 27 00:28:11 2012 +0000
    59.2 +++ b/test/harness/testcaseparser.h	Fri Jan 27 10:30:16 2012 +0000
    59.3 @@ -1,7 +1,7 @@
    59.4  #ifndef TESTCASE_PARSER_H
    59.5  #define TESTCASE_PARSER_H
    59.6  
    59.7 -#include <gclib/gclib.h>
    59.8 +#include <bl/bl.h>
    59.9  
   59.10  typedef struct {
   59.11      char *filename;
author	ali <ali@juiblex.co.uk>
	Fri Jan 27 10:30:16 2012 +0000 (2012-01-27)
changeset 5	f600b0d1fc5d
parent 4	218904410231
child 6	faab25d520dd
.hgignore		file \| annotate \| diff \| revisions
Makefile.am		file \| annotate \| diff \| revisions
README		file \| annotate \| diff \| revisions
bl/Makefile.am		file \| annotate \| diff \| revisions
bl/bl.h		file \| annotate \| diff \| revisions
bl/blstring.c		file \| annotate \| diff \| revisions
bl/blstring.h		file \| annotate \| diff \| revisions
bl/fileutils.c		file \| annotate \| diff \| revisions
bl/fileutils.h		file \| annotate \| diff \| revisions
bl/macros.h		file \| annotate \| diff \| revisions
bl/mem.c		file \| annotate \| diff \| revisions
bl/mem.h		file \| annotate \| diff \| revisions
bl/spawn.c		file \| annotate \| diff \| revisions
bl/spawn.h		file \| annotate \| diff \| revisions
bl/strfuncs.c		file \| annotate \| diff \| revisions
bl/strfuncs.h		file \| annotate \| diff \| revisions
bl/textfileutils.c		file \| annotate \| diff \| revisions
bl/textfileutils.h		file \| annotate \| diff \| revisions
bl/types.h		file \| annotate \| diff \| revisions
bl/utils.c		file \| annotate \| diff \| revisions
bl/utils.h		file \| annotate \| diff \| revisions
bookloupe/Makefile.am		file \| annotate \| diff \| revisions
bookloupe/bookloupe.c		file \| annotate \| diff \| revisions
bookloupe/bookloupe.typ.in		file \| annotate \| diff \| revisions
configure.ac		file \| annotate \| diff \| revisions
doc/Makefile.am		file \| annotate \| diff \| revisions
doc/bookloupe.txt		file \| annotate \| diff \| revisions
doc/gc-test.txt		file \| annotate \| diff \| revisions
doc/gutcheck.txt		file \| annotate \| diff \| revisions
doc/loupe-test.txt		file \| annotate \| diff \| revisions
gclib/Makefile.am		file \| annotate \| diff \| revisions
gclib/fileutils.c		file \| annotate \| diff \| revisions
gclib/fileutils.h		file \| annotate \| diff \| revisions
gclib/gclib.h		file \| annotate \| diff \| revisions
gclib/gcstring.c		file \| annotate \| diff \| revisions
gclib/gcstring.h		file \| annotate \| diff \| revisions
gclib/macros.h		file \| annotate \| diff \| revisions
gclib/mem.c		file \| annotate \| diff \| revisions
gclib/mem.h		file \| annotate \| diff \| revisions
gclib/spawn.c		file \| annotate \| diff \| revisions
gclib/spawn.h		file \| annotate \| diff \| revisions
gclib/strfuncs.c		file \| annotate \| diff \| revisions
gclib/strfuncs.h		file \| annotate \| diff \| revisions
gclib/textfileutils.c		file \| annotate \| diff \| revisions
gclib/textfileutils.h		file \| annotate \| diff \| revisions
gclib/types.h		file \| annotate \| diff \| revisions
gclib/utils.c		file \| annotate \| diff \| revisions
gclib/utils.h		file \| annotate \| diff \| revisions
gutcheck/Makefile.am		file \| annotate \| diff \| revisions
gutcheck/gutcheck.c		file \| annotate \| diff \| revisions
gutcheck/gutcheck.typ.in		file \| annotate \| diff \| revisions
test/compatibility/Makefile.am		file \| annotate \| diff \| revisions
test/harness/Makefile.am		file \| annotate \| diff \| revisions
test/harness/gc-test.c		file \| annotate \| diff \| revisions
test/harness/loupe-test.c		file \| annotate \| diff \| revisions
test/harness/testcase.c		file \| annotate \| diff \| revisions
test/harness/testcaseio.c		file \| annotate \| diff \| revisions
test/harness/testcaseparser.c		file \| annotate \| diff \| revisions
test/harness/testcaseparser.h		file \| annotate \| diff \| revisions