# HG changeset patch
# User ali <ali@juiblex.co.uk>
# Date 1327449245 0
# Node ID c2f4c0285180dc21526610b47f50248c76ed5f3e

Initial version

diff -r 000000000000 -r c2f4c0285180 .hgignore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.hgignore	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,22 @@
+gutcheck-.*\.tar\.gz
+gutcheck-.*/
+Makefile$
+Makefile\.in
+aclocal\.m4
+libtool
+stamp-h1
+autom4te\.cache
+config\.log
+config\.status
+config/
+configure
+\.deps/
+\.libs/
+\..*\.swp
+.*\.o
+.*\.la
+.*\.lo
+.*\.exe
+gutcheck/gutcheck\.typ
+gutcheck/gutcheck
+test/harness/gc-test
diff -r 000000000000 -r c2f4c0285180 COPYING
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/COPYING	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff -r 000000000000 -r c2f4c0285180 INSTALL
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/INSTALL	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,365 @@
+Installation Instructions
+*************************
+
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
+2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+
+   Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.  This file is offered as-is,
+without warranty of any kind.
+
+Basic Installation
+==================
+
+   Briefly, the shell commands `./configure; make; make install' should
+configure, build, and install this package.  The following
+more-detailed instructions are generic; see the `README' file for
+instructions specific to this package.  Some packages provide this
+`INSTALL' file but do not implement all of the features documented
+below.  The lack of an optional feature in a given package is not
+necessarily a bug.  More recommendations for GNU packages can be found
+in *note Makefile Conventions: (standards)Makefile Conventions.
+
+   The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation.  It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions.  Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+
+   It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring.  Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.
+
+   If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release.  If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+
+   The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'.  You need `configure.ac' if
+you want to change it or regenerate `configure' using a newer version
+of `autoconf'.
+
+   The simplest way to compile this package is:
+
+  1. `cd' to the directory containing the package's source code and type
+     `./configure' to configure the package for your system.
+
+     Running `configure' might take a while.  While running, it prints
+     some messages telling which features it is checking for.
+
+  2. Type `make' to compile the package.
+
+  3. Optionally, type `make check' to run any self-tests that come with
+     the package, generally using the just-built uninstalled binaries.
+
+  4. Type `make install' to install the programs and any data files and
+     documentation.  When installing into a prefix owned by root, it is
+     recommended that the package be configured and built as a regular
+     user, and only the `make install' phase executed with root
+     privileges.
+
+  5. Optionally, type `make installcheck' to repeat any self-tests, but
+     this time using the binaries in their final installed location.
+     This target does not install anything.  Running this target as a
+     regular user, particularly if the prior `make install' required
+     root privileges, verifies that the installation completed
+     correctly.
+
+  6. You can remove the program binaries and object files from the
+     source code directory by typing `make clean'.  To also remove the
+     files that `configure' created (so you can compile the package for
+     a different kind of computer), type `make distclean'.  There is
+     also a `make maintainer-clean' target, but that is intended mainly
+     for the package's developers.  If you use it, you may have to get
+     all sorts of other programs in order to regenerate files that came
+     with the distribution.
+
+  7. Often, you can also type `make uninstall' to remove the installed
+     files again.  In practice, not all packages have tested that
+     uninstallation works correctly, even though it is required by the
+     GNU Coding Standards.
+
+  8. Some packages, particularly those that use Automake, provide `make
+     distcheck', which can by used by developers to test that all other
+     targets like `make install' and `make uninstall' work correctly.
+     This target is generally not run by end users.
+
+Compilers and Options
+=====================
+
+   Some systems require unusual options for compilation or linking that
+the `configure' script does not know about.  Run `./configure --help'
+for details on some of the pertinent environment variables.
+
+   You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment.  Here
+is an example:
+
+     ./configure CC=c99 CFLAGS=-g LIBS=-lposix
+
+   *Note Defining Variables::, for more details.
+
+Compiling For Multiple Architectures
+====================================
+
+   You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory.  To do this, you can use GNU `make'.  `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script.  `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.  This
+is known as a "VPATH" build.
+
+   With a non-GNU `make', it is safer to compile the package for one
+architecture at a time in the source code directory.  After you have
+installed the package for one architecture, use `make distclean' before
+reconfiguring for another architecture.
+
+   On MacOS X 10.5 and later systems, you can create libraries and
+executables that work on multiple system types--known as "fat" or
+"universal" binaries--by specifying multiple `-arch' options to the
+compiler but only a single `-arch' option to the preprocessor.  Like
+this:
+
+     ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
+                 CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
+                 CPP="gcc -E" CXXCPP="g++ -E"
+
+   This is not guaranteed to produce working output in all cases, you
+may have to build one architecture at a time and combine the results
+using the `lipo' tool if you have problems.
+
+Installation Names
+==================
+
+   By default, `make install' installs the package's commands under
+`/usr/local/bin', include files under `/usr/local/include', etc.  You
+can specify an installation prefix other than `/usr/local' by giving
+`configure' the option `--prefix=PREFIX', where PREFIX must be an
+absolute file name.
+
+   You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files.  If you
+pass the option `--exec-prefix=PREFIX' to `configure', the package uses
+PREFIX as the prefix for installing programs and libraries.
+Documentation and other data files still use the regular prefix.
+
+   In addition, if you use an unusual directory layout you can give
+options like `--bindir=DIR' to specify different values for particular
+kinds of files.  Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.  In general, the
+default for these options is expressed in terms of `${prefix}', so that
+specifying just `--prefix' will affect all of the other directory
+specifications that were not explicitly provided.
+
+   The most portable way to affect installation locations is to pass the
+correct locations to `configure'; however, many packages provide one or
+both of the following shortcuts of passing variable assignments to the
+`make install' command line to change installation locations without
+having to reconfigure or recompile.
+
+   The first method involves providing an override variable for each
+affected directory.  For example, `make install
+prefix=/alternate/directory' will choose an alternate location for all
+directory configuration variables that were expressed in terms of
+`${prefix}'.  Any directories that were specified during `configure',
+but not in terms of `${prefix}', must each be overridden at install
+time for the entire installation to be relocated.  The approach of
+makefile variable overrides for each directory variable is required by
+the GNU Coding Standards, and ideally causes no recompilation.
+However, some platforms have known limitations with the semantics of
+shared libraries that end up requiring recompilation when using this
+method, particularly noticeable in packages that use GNU Libtool.
+
+   The second method involves providing the `DESTDIR' variable.  For
+example, `make install DESTDIR=/alternate/directory' will prepend
+`/alternate/directory' before all installation names.  The approach of
+`DESTDIR' overrides is not required by the GNU Coding Standards, and
+does not work on platforms that have drive letters.  On the other hand,
+it does better at avoiding recompilation issues, and works well even
+when some directory options were not specified in terms of `${prefix}'
+at `configure' time.
+
+Optional Features
+=================
+
+   If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+   Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System).  The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+   For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+   Some packages offer the ability to configure how verbose the
+execution of `make' will be.  For these packages, running `./configure
+--enable-silent-rules' sets the default to minimal output, which can be
+overridden with `make V=1'; while running `./configure
+--disable-silent-rules' sets the default to verbose, which can be
+overridden with `make V=0'.
+
+Particular systems
+==================
+
+   On HP-UX, the default C compiler is not ANSI C compatible.  If GNU
+CC is not installed, it is recommended to use the following options in
+order to use an ANSI C compiler:
+
+     ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
+
+and if that doesn't work, install pre-built binaries of GCC for HP-UX.
+
+   On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
+parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
+a workaround.  If GNU CC is not installed, it is therefore recommended
+to try
+
+     ./configure CC="cc"
+
+and if that doesn't work, try
+
+     ./configure CC="cc -nodtk"
+
+   On Solaris, don't put `/usr/ucb' early in your `PATH'.  This
+directory contains several dysfunctional programs; working variants of
+these programs are available in `/usr/bin'.  So, if you need `/usr/ucb'
+in your `PATH', put it _after_ `/usr/bin'.
+
+   On Haiku, software installed for all users goes in `/boot/common',
+not `/usr/local'.  It is recommended to use the following options:
+
+     ./configure --prefix=/boot/common
+
+Specifying the System Type
+==========================
+
+   There may be some features `configure' cannot figure out
+automatically, but needs to determine by the type of machine the package
+will run on.  Usually, assuming the package is built to be run on the
+_same_ architectures, `configure' can figure that out, but if it prints
+a message saying it cannot guess the machine type, give it the
+`--build=TYPE' option.  TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+
+     CPU-COMPANY-SYSTEM
+
+where SYSTEM can have one of these forms:
+
+     OS
+     KERNEL-OS
+
+   See the file `config.sub' for the possible values of each field.  If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+
+   If you are _building_ compiler tools for cross-compiling, you should
+use the option `--target=TYPE' to select the type of system they will
+produce code for.
+
+   If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+
+Sharing Defaults
+================
+
+   If you want to set default values for `configure' scripts to share,
+you can create a site shell script called `config.site' that gives
+default values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists.  Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Defining Variables
+==================
+
+   Variables not defined in a site shell script can be set in the
+environment passed to `configure'.  However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost.  In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'.  For example:
+
+     ./configure CC=/usr/local2/bin/gcc
+
+causes the specified `gcc' to be used as the C compiler (unless it is
+overridden in the site shell script).
+
+Unfortunately, this technique does not work for `CONFIG_SHELL' due to
+an Autoconf bug.  Until the bug is fixed you can use this workaround:
+
+     CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
+
+`configure' Invocation
+======================
+
+   `configure' recognizes the following options to control how it
+operates.
+
+`--help'
+`-h'
+     Print a summary of all of the options to `configure', and exit.
+
+`--help=short'
+`--help=recursive'
+     Print a summary of the options unique to this package's
+     `configure', and exit.  The `short' variant lists options used
+     only in the top level, while the `recursive' variant lists options
+     also present in any nested packages.
+
+`--version'
+`-V'
+     Print the version of Autoconf used to generate the `configure'
+     script, and exit.
+
+`--cache-file=FILE'
+     Enable the cache: use and save the results of the tests in FILE,
+     traditionally `config.cache'.  FILE defaults to `/dev/null' to
+     disable caching.
+
+`--config-cache'
+`-C'
+     Alias for `--cache-file=config.cache'.
+
+`--quiet'
+`--silent'
+`-q'
+     Do not print messages saying which checks are being made.  To
+     suppress all normal output, redirect it to `/dev/null' (any error
+     messages will still be shown).
+
+`--srcdir=DIR'
+     Look for the package's source code in directory DIR.  Usually
+     `configure' can determine that directory automatically.
+
+`--prefix=DIR'
+     Use DIR as the installation prefix.  *note Installation Names::
+     for more details, including other options available for fine-tuning
+     the installation locations.
+
+`--no-create'
+`-n'
+     Run the configure checks, but stop before creating any output
+     files.
+
+`configure' also accepts some other, not widely useful, options.  Run
+`configure --help' for more details.
+
diff -r 000000000000 -r c2f4c0285180 Makefile.am
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,1 @@
+SUBDIRS=gclib gutcheck test doc
diff -r 000000000000 -r c2f4c0285180 README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,68 @@
+                                   gutcheck
+                                   ========
+
+General installation instructions can be found in INSTALL. The following
+aim to give a quick overview and some help for specific systems. Documentation
+for gutcheck itself can be found in doc/gutcheck.txt and for the test
+framework in doc/gc-test.txt.
+
+Linux
+-----
+
+You should be able to use the standard:
+
+% ./configure
+% make
+% sudo make install
+
+If you get an error about no package 'glib-2.0' found, then you need to
+install the development package for glib2. Under Fedora, RHEL and friends
+that would be:
+
+% sudo yum install gcc pkgconfig glib2-devel
+
+Under Debian, Ubuntu and friends that would be:
+
+% sudo apt-get install gcc pkgconfig glib2-devel
+
+If you get really stuck, you can use the --without-glib option to configure,
+but this may well not be supported in a future version so this is probably
+best avoided.
+
+Microsoft Windows
+-----------------
+
+It should be possible to use MSYS (http://www.mingw.org/wiki/MSYS) to build
+on a Windows machine. You'll need a copy of the development package for
+glib and its dependencies from http://www.gtk.org/download/win32.php.
+
+It's much easier to build using a cross-compiler from Linux, if you have
+access to such a system. Under Fedora, RHEL and friends you can do this
+with:
+
+% sudo yum install mingw32-gcc pkgconfig mingw32-glib2-static \
+  mingw32-gettext-static mingw32-iconv-static
+% ./configure --host=i686-w64-mingw32 --disable-shared \
+  --bindir=/gutcheck --datadir=/
+% make
+% mkdir build
+% make install DESTDIR=`pwd`/build
+
+The contents of the build/gutcheck directory can then be copied to a
+Microsoft Windows machine.
+
+Depending on the version of mingw32-gcc you use, you may need to specify a
+different host type. If you're not sure look and see what the cross-compiler
+is called (eg., i686-pc-mingw32-gcc) and use the prefix as the host type.
+
+Mac
+---
+
+I think this should be quite similar to Linux, doing something like this:
+
+% sudo port install gcc pkgconfig glib2-devel
+% ./configure
+% make
+% sudo make install
+
+It may also be possible to use fink instead of macports.
diff -r 000000000000 -r c2f4c0285180 bootstrap.sh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bootstrap.sh	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,6 @@
+#!/bin/sh
+mkdir -p config
+aclocal && \
+  libtoolize && \
+  automake --foreign --add-missing && \
+  autoconf
diff -r 000000000000 -r c2f4c0285180 configure.ac
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/configure.ac	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,94 @@
+#                                               -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+AC_INIT([gutcheck],[1.50],[ali@juiblex.co.uk])
+AC_PREREQ(2.59)
+AC_CONFIG_AUX_DIR([config])
+AC_CONFIG_SRCDIR([gutcheck/gutcheck.c])
+AC_CONFIG_FILES([Makefile
+gclib/Makefile
+gutcheck/Makefile
+test/Makefile
+test/harness/Makefile
+test/compatibility/Makefile
+doc/Makefile
+])
+AM_INIT_AUTOMAKE(no-define)
+AC_CANONICAL_HOST
+
+##################################################
+# Checks for programs.
+##################################################
+AC_PROG_CC
+LT_INIT
+# Libtool supports a --disable-shared option to tell it to avoid
+# building shared versions of libraries. We don't have any libraries
+# but we do want to support building static versions of our executables.
+# Libtool can do this (under the right circumstances) so we overload
+# this switch for this purpose.
+#
+# The libtool option that we use (-static-libtool-libs) means to use
+# static linking with libraries that supply a .la file and which
+# include a non-empty value for "old_library". If the library doesn't
+# include a .la file (they are deleted by some distributions), then
+# this option will have no effect and likewise if old_library is set
+# to '' (eg., if the library was build with --disable-static) then
+# again -static-libtool-libs will have no effect.
+#
+# If old_library is set to a non-empty value, then specifying
+# -static-libtool-libs will cause the link to fail if the old library
+# cannot be found (libtool will not fallback to a shared library
+# in these circumstances). This can happen with Fedora, for example,
+# if a main mingw32 library package is installed but not the
+# coresponding static sub-package. The solution is to either
+# install the relevant static sub-packages or don't use --disable-shared.
+AS_IF([test "$enable_shared" = no],[
+  LDFLAGS="$LDFLAGS -static-libtool-libs"
+])
+PKG_PROG_PKG_CONFIG
+
+##################################################
+# Checks for header files.
+##################################################
+
+##################################################
+# Checks for typedefs, structures, and compiler characteristics.
+##################################################
+
+##################################################
+# Checks for libraries.
+##################################################
+AC_MSG_CHECKING([whether to use glib])
+AC_ARG_WITH([glib],[AS_HELP_STRING([--without-glib],
+  [use internal re-invented wheel rather than glib2])])
+AS_IF([test "$with_glib" != no],[
+  AC_MSG_RESULT([yes])
+  PKG_CHECK_MODULES([GLIB],[glib-2.0])
+  AC_DEFINE([HAVE_GLIB],[1],[Define if you have glib version 2.x])
+],[
+  AC_MSG_RESULT([no])
+])
+AM_CONDITIONAL([HAVE_GLIB],[test "$with_glib" != no])
+
+# NOTE: If we are using a static version of glib then we
+# should define GLIB_STATIC_COMPILATION. This isn't needed
+# when glib is built only for static use (in which case
+# glibconfig.h will already define GLIB_STATIC_COMPILATION).
+# It's not easy to tell if libtool will actually link with
+# a static glib but luckily we don't currently need to;
+# this pre-processor define only affects the behaviour of
+# libraries which use glib and we don't have any.
+
+##################################################
+# Checks for library functions.
+##################################################
+AC_CHECK_FUNCS_ONCE([mkstemp])
+
+##################################################
+# Checks for processor independent files.
+##################################################
+
+##################################################
+# Generate the various configured files
+##################################################
+AC_OUTPUT
diff -r 000000000000 -r c2f4c0285180 doc/Makefile.am
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,3 @@
+dist_pkgdata_DATA=gutcheck.txt gc-test.txt
+
+EXTRA_DIST=README-0.99
diff -r 000000000000 -r c2f4c0285180 doc/README-0.99
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/README-0.99	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,24 @@
+RELEASE NOTES FOR GUTCHECK 0.99 20051105
+----------------------------------------
+
+This is the README file for Gutcheck.
+
+Gutcheck is a command-line tool for finding problems in
+files for submission to Project Gutenberg.
+
+You should have received the following files:
+
+    GUTCHECK.EXE    MS-DOS Executable
+    gutcheck.txt    Documentation
+    gutcheck.c      Source code
+    gutcheck.typ    A sample typo file
+    README          This file
+    COPYING         A copy of the GNU GPL licence
+
+This program is free software, without warranty of any kind,
+licensed under the GNU GPL.  A copy of the GNU GPL, entitled
+'COPYING' should be present.  If not, you can find one at 
+http://www.fsf.org.
+
+Gutcheck was written by Jim Tinsley, who can be reached at 
+jtinsley@pobox.com, or via http://gutcheck.sourceforge.net.
diff -r 000000000000 -r c2f4c0285180 doc/gc-test.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/gc-test.txt	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,64 @@
+                            gutcheck test framework
+                            =======================
+
+Running existing testcases
+--------------------------
+
+The test harness (the program that runs a test) is called gc-test. The various
+testcases are stored in multiple text files, typically with a .tst extension.
+
+To run a testcase when all of gutcheck, gc-test and the testcase file are
+in the current directory simply do something like:
+
+% gc-test missing-space.tst
+
+from a command prompt. Under MS-Windows, this is called a command window and
+the prompt will normally look slightly different, eg.,
+
+C:\DP> gc-test missing-space.tst
+
+To run all the tests in the current directory, do something like this:
+
+% gc-test *.tst
+
+If gutcheck is not in the current directory, then you can set an environment
+variable (GUTCHECK) to point at it. For example, on MS-Windows you might do:
+
+C:\DP> set GUTCHECK=C:\GUTCHECK\GUTCHECK.EXE
+C:\DP> gc-test *.tst
+
+Writing your own testcases
+--------------------------
+
+Writing a new testcase is pretty painless. Most testcases follow this simple
+pattern:
+
+		┌──────────────────────────────────────────┐
+		│**************** INPUT ****************   │
+		│"Look!John, over there!"                  │
+		│**************** EXPECTED ****************│
+		│                                          │
+		│"Look!John, over there!"                  │
+		│    Line 1 column 6 - Missing space?      │
+		└──────────────────────────────────────────┘
+
+The sixteen asterisks in this example form what is known as the "flag". This
+flag must come before and after all tags (eg., INPUT and EXPECTED). In the
+unlikely event that you need sixteen asterisks at the start of line of text,
+then simply choose a different flag and use it throughout the file (flags
+can be any sequence of ASCII characters except control codes and space).
+
+Note that the header that gutcheck normally outputs is not included in the
+expected output. This avoids problems with not knowing beforehand the name
+of the file that gutcheck will be asked to look at (and saves typing!).
+gutcheck prints a blank line before each warning. These are not part of the
+header and so do need to be included.
+
+To test that gutcheck produces no output, you still need to include
+an EXPECTED tag, just with no text following it. If there is no EXPECTED
+tag, then gc-test will consider that no expectation exists and won't check
+the output at all.
+
+There is no support yet for non-ASCII testcases, embedded linefeeds,
+passing command line options to gutcheck or for testcases which are
+expected to fail.
diff -r 000000000000 -r c2f4c0285180 doc/gutcheck.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/doc/gutcheck.txt	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,742 @@
+
+
+                            Gutcheck documentation
+
+
+gutcheck:  lists possible common formatting errors in a Project
+Gutenberg candidate file. It is a command line program and can be used
+under Win32 or Unix (gutcheck.c should compile anywhere; if it doesn't,
+tell me). For Windows-only people, there is an appendix at the end
+with brief instructions for running it.
+
+
+Current version: 0.99. Users of 0.98 see end of file for changes.
+
+You should also have received the licence file COPYING, a README file, 
+gutcheck.c, the source code, and gutcheck.exe, a DOS executable, with
+this file.
+
+This software is Copyright Jim Tinsley 2000-2005.
+
+Gutcheck comes wih ABSOLUTELY NO WARRANTY. For details, read the file COPYING.
+This is Free Software; you may redistribute it under certain conditions (GPL).
+
+See http://gutcheck.sourceforge.net for the latest version.
+
+
+Usage is: gutcheck [-setopxlywm] filename
+      where:
+      -s checks Single quotes 
+      -e switches off Echoing of lines 
+      -t checks Typos
+      -o produces an Overview only
+      -p sets strict quotes checking for Paragraphs
+      -x (paranoid) switches OFF typo checking and extra checks
+      -l turns off Line-end checks
+      -y sets error messages to stdout
+      -w is a special mode for web uploads (for future use)
+      -v (verbose) forces individual reporting of minor problems
+      -m interprets Markup of some common HTML tags and entities    
+      -u warns about words in a user-defined typo file gutcheck.typ 
+      -d ignores some DP-specific markup
+
+Running gutcheck without any parameters will display a brief help message.
+
+Sample usage: 
+
+    gutcheck warpeace.txt
+
+
+More detail:
+
+    Echoing lines (-e to switch off)
+
+      You may find it convenient, when reviewing Gutcheck's 
+      suggestions, to see the line that Gutcheck is questioning.
+      That way, you can often see at a glance whether it is
+      a real error that needs to be fixed, or a false positive
+      that should be in the text, but Gutcheck's limited
+      programming doesn't understand.
+
+      By default, gutcheck echoes these lines, but if you don't 
+      want to see the lines referred to, -e will switch it OFF.
+
+
+    Quotes (-s and -p switches)
+
+      Gutcheck always looks for unbalanced doublequotes in a 
+      paragraph. It is a common convention for writers not to
+      close quotes in a paragraph if the next paragraph opens
+      with quotes and is a continuation by the same speaker.
+
+      Gutcheck therefore does not normally report unclosed quotes 
+      if the next paragraph begins with a quote. If you need
+      to see all unclosed quotes, even where the next paragraph
+      begins with a quote, you should use the -p switch.
+
+      Singlequotes (') are a problem, since the same character
+      is used for an apostrophe. I'm not sure that it is 
+      possible to get 100% accuracy on singlequotes checking,
+      particularly since dialect, quite common in PG texts,
+      upsets the normal rules so badly. Consider the sentence:
+        'Tis often said that a man's a man for a' that.
+      As humans, we recognize that both apostrophes are used
+      for contractions rather than quotes, but it isn't easy 
+      to get a program to recognize that.
+
+      Since Gutcheck makes too many mistakes when trying to match
+      singlequotes, it doesn't look for unbalanced singlequotes
+      unless you specify the -s switch.
+
+      Consider these sentences, which illustrate the main cases:
+
+        'Tis often said that a fool and his money are soon parted.
+
+        'Becky's goin' home,' said Tom.
+
+        The dogs' tails wagged in unison.
+
+        Those 'pack dogs' of yours look more like wolves.
+
+
+
+    Typos (-t switch)
+
+      It's not Gutcheck's job to be a spelling checker, but it
+      does check for a list of common typos and OCR errors if you
+      use the -t switch. (The -x switch also turns typo checking on.)
+
+      It also checks for character combinations, especially involving
+      h and b, which are often confused by OCR, that rarely or never
+      occur. For example, it queries "tbe" in a word. Now, "the" often
+      occurs, but "tbe" is very rare (heartbeat, hotbed), so I'm
+      playing the odds - a few false positives for many errors found.
+      Similarly with "ii", which is a very common OCR error.
+
+      Gutcheck suppresses multiple reporting of the first 40 "typos"
+      found. This is to remove the annoyance of seeing something like
+      "FN" (footnote) or "LK" (initials) flagged as a typo 147 times
+      in a text. 
+
+
+    Line-end checking (-l switch to disable)
+
+      All PG texts should have a Carriage Return (CR - character 13)
+      and a Line Feed (LF - character 10) at end of each line,
+      regardless of what O/S you made them on. DOS/Windows, Unix
+      and Mac have different conventions, but the final text should
+      always use a CR/LF pair as its line terminator.
+
+      By default, Gutcheck verifies that every line does have
+      the correct terminator, but if you're on a work-in-progress
+      in Linux, you might want to convert the line-ends as a final
+      step, and not want to see thousands of errors every time you
+      run Gutcheck before that final step, so you can turn off 
+      this checking with the -l switch.
+
+
+    Paranoid mode (-x switch to disable: Trust No One :-)
+
+      -x switches OFF typo-checking, the -t flag, automatically
+      and some extra checks like standalone 1 and 0 queries.
+
+
+    Overview mode (-o switch)
+
+       This mode just gives a count of queries found
+       instead of a detailed list.
+
+
+    Header quote  (-h switch)
+
+       If you use the -h switch, gutcheck will also display
+       the Title, Author, Release and Edition fields from the
+       PG header. This is useful mostly for the automated
+       checks we do on recently-posted texts.
+
+
+    Errors to stdout (-y switch)
+
+       If you're just running gutcheck normally, you can ignore
+       this. It's only there for programs that provide a front
+       end to gutcheck. It makes error messages appear within
+       the output of gutcheck so that the front end knows whether
+       gutcheck ran OK.
+
+
+    Verbose reporting (-v switch)
+
+       Normally, if gutcheck sees lots of long lines, short lines,
+       spaced dashes, non-ASCII characters or dot-commas ".," it
+       assumes these are features of the text, counts and summarizes
+       them at the top of its report, but does not list them 
+       individually. If the -v switch is on, gutcheck will list them all.
+
+
+    Markup interpretation (-m switch)
+
+       Normally, gutcheck flags anything it suspects of being HTML
+       markup as a possible error. When you use the -m switch,
+       however, it matches anything that looks like markup against
+       a short list of common HTML tags and entities. If the markup
+       is in that list, it either ignores the markup, in the case
+       of a tag, or "interprets" the markup as its nearest ASCII 
+       equivalent, in the case of an entity. So, for example, using
+       this switch, gutcheck will "see"
+
+       &ldquo;He went <i>thataway!</i>&rdquo;
+
+       as
+
+       "He went thataway!"
+
+       and report accordingly.
+
+       This switch does not, not, NOT check the validity of HTML;
+       it exists so that you can run gutcheck on most HTML texts
+       for PG, and get sane results. It does not support all tags.
+       It does not support all entities. When it sees a tag or entity
+       it does not recognize, it will query it as HTML just as if
+       you hadn't specified the -m switch.
+
+       Gutcheck 0.99 will automatically switch on markup interpretation
+       if it sees a lot of tags that appear to be markup, so mostly, you
+       won't have to specify this.
+
+    User-defined typos (-u switch)
+
+        If you have a file named gutcheck.typ either in your current
+        working directory or in the directory from which you explicitly
+        invoked gutcheck, but not necessarily on your path, and if you
+        specify the -u switch, gutcheck will query any word specified 
+        in that file. The file is simple: one word, in lower case, per
+        line. 999 lines are allowed for. Be careful not to put multiple
+        words onto a line, or leave any rubbish other than the word on
+        the line. You should have received a sample file gutcheck.typ
+        with this package.
+
+    Ignore DP markup (-d switch)
+        
+        Distributed Proofreaders (http://www.pgdp.net) is currently
+        (2005) the main source of PG texts, and proofers there use
+        special conventions. This switch understands those conventions,
+        so that people can use gutcheck on files in process that still
+        haven't had the special conventions removed yet. The special
+        conventions supported in 0.99 are page-separators and
+        "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/".
+
+
+You will probably only run gutcheck on a text once or maybe twice,
+just prior to uploading; it usually finds a few formatting problems;
+it also usually finds queries that aren't problems at all - it often
+questions Tables of Contents for having short lines, for example.
+These are called "false positives", and need a human to decide on
+them.
+
+The text should be standard prose, and already close to PG normal
+format (plain text, about 70 characters per line with blank lines
+between paragraphs).
+
+Gutcheck merely draws your attention to things that might be errors.
+It is NOT a substitute for human judgement. Formatting choices like
+short lines may be for a reason that this program can't understand.
+
+Even the most careful human proofing can leave errors behind in a
+text, and there are several automated checks you can do to help find
+them. Of these, spellchecking (with _very_ careful human judgement) is
+the most important and most useful.
+
+Gutcheck does perform some basic typo-checking if you ask it to,
+but its focus is on formatting errors specific to PG texts - 
+mismatched quotes, non-ASCII characters, bad spacing, bad line
+length, HTML tags perhaps left from a conversion, unbalanced
+brackets.
+
+Suggestions for additional checks would be appreciated and duly 
+considered, but no guarantees that they will be implemented.
+
+
+
+
+                How do _I_ use it?
+
+Practically everyone I give gutcheck to asks me how _I_ use it.
+Well, when I get a text for posting, say filename.txt, I run
+
+    gutcheck -o filename.txt
+
+That gives me a quick idea what I'm dealing with. It'll tell
+me what kind of problems gutcheck sees, and give me an idea 
+of how much more work needs to be done on the text. Keep in 
+mind that gutcheck doesn't do anything like a full spellcheck,
+but when I see a text that has a lot of problems, I assume that
+it probably needs a spellcheck too.
+
+Having got a feel for the ballpark, I run
+
+    gutcheck filename.txt > jj
+
+where jj is my personal, all-purpose filename for temporary data
+that doesn't need to be kept. Then I open filename.txt and jj in
+a split-screen view in my editor, and work down the text, fixing
+whatever needs fixing, and skipping whatever doesn't. If your 
+editor doesn't split-screen, you can get much the same effect by 
+opening your original file in your normal editor, and jj (or your
+equivalent name) in something like Notepad, keeping both in view 
+at the same time.
+
+Twice a day, an automatic process looks at all recently-posted
+texts, and emails Michael, me, and sometimes other people with
+their gutcheck summaries.
+
+
+
+        Future development of gutcheck
+
+Gutcheck has gone about as far as it can, given its current
+structure. In order to add better singlequotes checking,
+sentence checking, better he/be checking and other good stuff
+that I'd like to see, I'll have to rewrite it from a different
+angle - looking at the syntax instead of the lines. And I'll
+probably get around to that sooner or later.
+
+Meantime, I'm just trying to get this version stabilized, so
+please report any bugs you find. When it is stable, I'll run
+up a Windows port for those timid souls who can't look a 
+command line in the eye. :-)
+
+And I've started work on gutspell, a companion to gutcheck
+which will concentrate on spelling problems. PG spelling
+problems are unusual, since the range of texts we cover is
+so wide, and I'll be taking a somewhat unorthodox approach
+to writing this spelling-checker _specifically_ for texts
+containing a lot of dialect and uncommon words that have
+probably already been spell-checked against a standard
+modern dictionary.
+
+
+
+
+Explanations of common gutcheck messages:
+
+    --> 74 lines in this file have white space at end
+
+    PG texts shouldn't have extra white space added at end of line.
+    Don't worry too much about this; they're not doing any harm,
+    and they'll be removed during posting anyway.
+
+
+    --> 348 lines in this file are short. Not reporting short lines.
+    --> 84 lines in this file are long. Not reporting long lines.
+    --> 8 lines in this file are VERY long!
+
+    If there are a lot of long or short lines, Gutcheck won't list
+    them individually. The short lines version of this message
+    is commonly seen when gutchecking poetry and some plays, where
+    the normal line length is shorter than the standard for prose.
+    A "VERY long" line is one over 80 characters.  You normally
+    shouldn't have any of these, but sometimes you may have to render
+    a table that must be that long, or some special preformatted
+    quotation that can't be broken.
+
+
+    --> There are 75 spaced dashes and em-dashes in this file. Not reporting them.
+
+    The PG standard for an emdash--like these--is two minus signs
+    with no spaces before or after them. However, some older texts
+    used spaced dashes - like these -- and if there are very many
+    such spaced dashes in the file, gutcheck just draws your
+    attention to it and doesn't list them individually.
+
+
+
+    Line 3020 - Non-ASCII character 233
+
+    Standard PG texts should use only ASCII characters with values
+    up to 127; however, non-English, accented characters can be 
+    represented according to several different non-ASCII encoding 
+    schemes, using values over 127. If you have a plain English text
+    with a few accented characters in words like cafe or tete-a-tete,
+    you should replace the accented characters with their unaccented 
+    versions. The English pound sign is another commonly-seen
+    non-ASCII character. If you have enough non-ASCII characters in
+    your text that you feel removing them would degrade your text
+    unacceptably, you should probably consider doing an 8-bit text
+    as well as a plain-ASCII version.
+
+
+
+    Line 1207 - Non-ISO-8859 character 156
+
+    Even in "8-bit" texts, there are distinctions between code sets.
+    The ISO-8859 family of 8-bit code sets is the most commonly used
+    in PG, and these sets do not define values in the range 128 through
+    159 as printable characters. It's quite common for someone on a
+    Windows or Mac machine to use a non-ISO character inadvertently,
+    so this message warns that the character is not only not ASCII,
+    but also outside the ISO-8859 range.
+
+
+
+    Line 46 - Tab character?
+
+    Some editors and WPs will put in Tab characters (character 9) to
+    indicate indented text. You should not use these in a PG text,
+    because you can't be sure how they will appear on a reader's
+    screen. Find the Tab, and replace it with the appropriate number
+    of spaces.
+
+
+    Line 1327 - Tilde character?
+
+    The tilde character (~) might be legitimately used, but it's the
+    character commonly used by OCR software to indicate a place where
+    it couldn't make out the letter, so gutcheck flags it.
+
+
+
+    Line 1347 - Asterisk?
+
+    Asterisks are reported only in paranoid mode (see -x). 
+    Like tildes, they are often used to indicate errors, but they are
+    also legitimately used as line delimiters and footnote markers.
+
+
+
+    Line 1451 - Long line 129
+
+    PG texts should have lines shorter than 76. There may be occasions
+    where you decide that you really have to go out to 79 characters,
+    but the sample above says that line 1451 is 129 characters long -
+    probably two lines run together.
+
+
+
+    Line 1590 - Short line?
+
+    PG texts should have lines longer than 54 characters. However,
+    there are special cases like poetry and tables of contents where
+    the lines _should_ be shorter. So treat Gutcheck warnings about
+    short lines carefully. Sometimes it's a genuine formatting
+    problem; sometimes the line really needs to be short.
+
+    Hint: gutcheck will not flag lines as short if they are indented
+    - if they start with a space. I like to start inserted stanzas
+    and other such items indented with a couple of spaces so that 
+    they stand out from the main text anyway.
+
+
+
+    Line 1804 - Begins with punctuation?
+
+    Lines should normally not begin with commas, periods and so on.
+    An exception is ellipses . . . which can happen at start of line.
+
+
+
+    Line 1850 - Spaced em-dash?
+
+    The PG standard for an em-dash--like these--is two minus signs
+    with no spaces before or after them. Gutcheck flags non-PG
+    em-dashes - like this one. Normally, you will replace it with a 
+    PG-standard em-dash.
+
+
+
+    Line 1904 - Query he/be error?
+
+    Gutcheck makes a very minor effort to look for that scourge of all
+    proofreaders, "be" replacing "he" or vice-versa, and draws your
+    attention to it when it thinks it has found one.
+
+
+
+    Line 2017 - Query digit in a1most
+
+    The digit 1 is commonly OCRed for the letter l, the digit 0 for
+    the letter O, and so on. When gutcheck sees a mix of digits and
+    letters, it warns you. It may generate a false positive for
+    something like 7am.
+
+
+
+    Line 2083 - Query standalone 0
+
+    In paranoid mode (see -x) only, gutcheck warns about the digit 0 
+    and the number 1 standing alone as a word. This can happen if the 
+    OCR misreads the words O or I.
+
+
+
+    Line 2115 - Query word whetber
+
+    If you have switched typo-checking on, gutcheck looks for
+    potential typos, especially common h/b errors. It's not
+    infallible; it sometimes queries legit words, but it's
+    always worth taking a look.
+
+
+
+    Line 2190 column 14 - Missing space?
+
+    Omitting a space is a very common error,especially coming from
+    OCRed text,and can be hard for a human to spot. The commas in
+    the previous sentence illustrate the kind of thing I mean.
+
+
+
+    Line 2240 column 48 - Spaced punctuation?
+
+    The flip side of the "missing space" error , here , is when extra
+    spaces are added before punctuation . Some old texts appear to add
+    extra spaces around punctuation consistently, but this was a
+    typographical convention rather than the author's intent, and the
+    extra "spaces" should be removed when preparing a PG text.
+
+
+
+    Line 2301 column 19 - Unspaced quotes?
+
+    Another common spacing problem occurs in a phrase like "You wait
+    there,"he said.
+
+
+
+    Line 2385 column 27 - Wrongspaced quotes?
+
+    As of version 0.98, gutcheck adds extra checks on whether a quote
+    seems to be a start or end quote, and queries those that appear to
+    be misplaced. This does give rise to false positives when quotes are
+    nested, for example:
+
+    "And how," she asked, "will your "friends" help you now?"
+
+    but these false positives are worth it because of the many cases
+    that this test catches, notably those like:
+
+    "And how, "she said," will your friends help you now?"
+
+    Sometimes a "wrongspaced quotes" query will arise because an earlier
+    quote in the paragraph was omitted, so if the place specified seems
+    to be OK, look back to see whether there's a problem in the preceding
+    lines.
+
+
+
+    Line 2400 - HTML Tag? <PRE>
+
+    Some PG texts have been converted from HTML, and not all of the
+    HTML tags have been removed.
+
+
+
+    Line 2402 - HTML symbol? &emdash;
+
+    Similarly, special HTML symbol characters can survive into PG
+    texts. Can occasionally produce amusing false positives like
+    . . . Marwick & Co were well known for it;
+
+
+
+    Line 2540 - Mismatched quotes
+
+    Another gutcheck mainstay - unclosed doublequotes in a paragraph.
+    See the discussion of quotes in the switches section near the
+    start of this file.
+    
+    Since the mismatch doesn't occur on any one line, gutcheck quotes
+    the line number of the first blank line following the paragraph,
+    since this is the point where it reconciles the count of quotes.
+    However, if gutcheck is echoing lines, that is, you haven't used
+    the -e switch, it will show the _first_ line of the paragraph, 
+    to help you find the place without using line numbers. The 
+    offending paragraph is therefore between the quoted line and 
+    the line number given.
+
+
+
+    Line 2587 - Mismatched single quotes
+
+    Only checked with the -s switch, since checking single quotes is 
+    not a very reliable process. Otherwise, the same logic as for 
+    doublequotes applies.
+
+
+
+    Line 2877 - Mismatched round brackets?
+
+    Also curly and square brackets. Texts with a lot of brackets, like
+    plays with bracketed stage instructions, may have mismatches.
+
+
+    Line 3150 - No CR?
+    Line 3204 - Two successive CRs?
+    Line 3281 position 75 - CR without LF?
+
+    These are the invalid line-end warnings. See the discussion of
+    line-end checking in the switches section near the start of this
+    file. If you see these, and your editor doesn't show anything
+    wrong, you should probably try deleting the characters just before
+    and after the line end, and the line-end itself, then retyping the
+    characters and the line-end.
+
+
+    Line 2940 - Paragraph starts with lower-case
+
+    A common error in an e-text is for an extra blank line
+
+    to be put in, like the blank line above, and this often
+    shows up as a new paragraph beginning with lower case.
+    Sometimes the blank line is deliberate, as when a 
+    quotation is inserted in a speech. Use your judgement.
+
+
+    Line 2987 - Extra period?
+
+    An extra period. is a. common problem in OCRed text. and usually
+    arises when a speck of dust on the page is mistaken for a period.
+    or. as occasionally happens. when a comma loses its tail.
+
+
+    Line 3012 column 12 - Double punctuation?
+
+    Double punctuation., like that,, is a common typo and
+    scanno. Some books have much legit double punctuation,
+    like etc., etc., but it's worth checking anyway.
+
+
+
+            *       *       *        *
+
+For Windows-only users who are unfamiliar with DOS:
+
+    If you're a Windows-only user, you need to save
+    gutcheck.exe into the folder (directory) where the
+    text file you want to check is. Let's say your
+    text file is in C:\GUT, then you should save
+    GUTCHECK.EXE into C:\GUT.
+
+    Now get to a DOS prompt. You can do this by
+    selecting the "Command Prompt" or "MS-DOS Prompt"
+    option that will be somewhere on your
+    Start/Programs menu.
+
+    Now get into the C:\GUT directory. 
+    You can do this using the CD (change directory) 
+    command, like this:
+        CD \GUT
+    and your prompt will change to 
+        C:\GUT>
+    so you know you're in the right place.
+
+    Now type
+        gutcheck yourfile.txt
+    and you'll see gutcheck's report
+
+    By default, gutcheck prints its queries to screen.
+    If you want to create a file of them, to edit
+    against the text, you can use the greater-than
+    sign (>) to tell it to output the report to a
+    file. For example, if you want its report in a
+    file called QUERIES.LST, you could type
+    
+        gutcheck yourfile.txt > queries.lst
+
+    The queries.lst file will then contain the listing
+    of possible formatting errors, and you can
+    edit it alongside your text.
+
+    Whatever you do, DON'T make the filename after
+    the greater-than sign the name of a file already
+    on your disk that you want to keep, because
+    the greater-than sign will cause gutcheck to
+    replace any existing file of that name.
+
+    So, for example, if you have two Tolstoy files
+    that you want to check, called WARPEACE.TXT and 
+    ANNAK.TXT, make sure that neither of these names
+    is ever used following the greater-than sign.
+    To check these correctly, you might do:
+
+    gutcheck warpeace.txt >war.lst
+
+    and
+
+    gutcheck annak.txt > annak.lst
+
+    separately. Then you can look at war.lst and annak.lst
+    to see the gutcheck reports.
+
+            *       *       *        *
+
+
+For existing 0.98 users upgrading to 0.99:
+
+    If you run on old 16-bit DOS or Windows 3.x, I'm afraid
+    you're out of luck. I'm not saying it _can't_ be compiled
+    to run on 16-bit, but the executable with the package is
+    for Win32 only. *nix users won't notice the change at all.
+
+
+    There are two new switches: -u and -d. 
+          See above for full rundown.
+
+
+Here's a list of the new errors:
+
+    Line 1456 - Carat character?
+
+    I^ve found a few.
+
+
+    Line 1821 - Forward slash?
+
+    Common error for italicized "I", or so /'ve found.
+
+
+    Line 2139 - Query missing paragraph break?
+
+    "Come here, son." "Do I _have_ to go, dad?"
+    Like that. False positives in some texts. Sorry 'bout that,
+    but these are often errors.
+
+
+    Line 2200 - Query had/bad error?
+
+    Clear enough. Doesn't catch as many as I'd like it to,
+    but rarely gives false alarms.
+
+
+    Line 2268 - Query punctuation after the?
+
+    Some words, like "the", very rarely have punctuation
+    following them. Others, like "Mrs", usually have a
+    period, but never a comma. Occasional false positives.
+
+
+    Line 2380 - Query possible scanno arid
+
+    It found one of your user-defined typos when you
+    used the -u switch.
+
+
+    Line 2511 - Capital "S"?
+
+    Surprisingly common specific case, like: Jane'S 
+
+    
+    Line 3469 - endquote missing punctuation?
+
+    OK. This one can really cause a lot of false positives
+    in some books, but it switches itself off if it finds
+    more than 20 in a text, unless you force it to list them
+    all with the -v switch.
+    "Hey, dad" Johnny said, "can we go now?"
+    is a common punctuation-missing error.
+
+
+    Line 4266 - Mismatched underscores?
+
+    Like mismatched anything else!
+
+
diff -r 000000000000 -r c2f4c0285180 gclib/Makefile.am
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,10 @@
+INCLUDES=-I$(top_srcdir)
+AM_CFLAGS=$(GLIB_CFLAGS)
+LIBS=$(GLIB_LIBS)
+
+noinst_LTLIBRARIES=libgc.la
+libgc_la_SOURCES=gclib.h textfileutils.c textfileutils.h spawn.c spawn.h
+if !HAVE_GLIB
+libgc_la_SOURCES+=macros.h types.h fileutils.c fileutils.h mem.c mem.h \
+  strfuncs.c strfuncs.h gcstring.c gcstring.h utils.c utils.h
+endif
diff -r 000000000000 -r c2f4c0285180 gclib/fileutils.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/fileutils.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,46 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <gclib/macros.h>
+#include <gclib/mem.h>
+#include <gclib/fileutils.h>
+#include <gclib/gcstring.h>
+
+/*
+ * Read a file into memory (which should be freed with mem_free when no
+ * longer required). Returns FALSE on error and outputs a suitable error
+ * message to stderr.
+ */
+boolean file_get_contents(const char *filename,char **contents,size_t *length)
+{
+    FILE *fp;
+    size_t n;
+    char *buffer;
+    String *string;
+    fp=fopen(filename,"rb");
+    if (!fp)
+    {
+	perror(filename);
+	return FALSE;
+    }
+    buffer=mem_new(char,1024);
+    string=string_new(NULL);
+    do
+    {
+	n=fread(buffer,1,1024,fp);
+	if (n<0)
+	{
+	    perror(filename);
+	    string_free(string,TRUE);
+	    mem_free(buffer);
+	    free(fp);
+	    return FALSE;
+	}
+	string_append_len(string,buffer,n);
+    } while(n);
+    mem_free(buffer);
+    if (length)
+	*length=string->len;
+    *contents=string_free(string,FALSE);
+    fclose(fp);
+    return TRUE;
+}
diff -r 000000000000 -r c2f4c0285180 gclib/fileutils.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/fileutils.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,8 @@
+#ifndef GC_FILEUTILS_H
+#define GC_FILEUTILS_H
+
+#include <gclib/types.h>
+
+boolean file_get_contents(const char *filename,char **contents,size_t *length);
+
+#endif /* GC_FILEUTILS_H */
diff -r 000000000000 -r c2f4c0285180 gclib/gclib.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/gclib.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,36 @@
+#if HAVE_GLIB
+
+#include <glib.h>
+#define GC_DIR_SEPARATOR G_DIR_SEPARATOR
+#define GC_DIR_SEPARATOR_S G_DIR_SEPARATOR_S
+#define GC_IS_DIR_SEPARATOR(c) G_IS_DIR_SEPARATOR(c)
+#define boolean gboolean
+#define String GString
+#define mem_new0 g_new0
+#define mem_free g_free
+#define str_dup g_strdup
+#define str_ndup g_strndup
+#define path_get_basename g_path_get_basename
+#define file_get_contents(filename,contents,length) \
+  g_file_get_contents(filename,contents,length,NULL)
+#define string_new g_string_new
+#define string_append g_string_append
+#define string_append_len g_string_append_len
+#define string_append_c g_string_append_c
+#define string_free g_string_free
+#define string_set_size g_string_set_size
+
+#else	/* !HAVE_GLIB */
+
+#include <gclib/macros.h>
+#include <gclib/types.h>
+#include <gclib/mem.h>
+#include <gclib/fileutils.h>
+#include <gclib/strfuncs.h>
+#include <gclib/gcstring.h>
+#include <gclib/utils.h>
+
+#endif	/* HAVE_GLIB */
+
+#include <gclib/textfileutils.h>
+#include <gclib/spawn.h>
diff -r 000000000000 -r c2f4c0285180 gclib/gcstring.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/gcstring.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,90 @@
+#include <stdlib.h>
+#include <string.h>
+#include <gclib/gcstring.h>
+#include <gclib/types.h>
+#include <gclib/mem.h>
+#include <gclib/strfuncs.h>
+
+/*
+ * Strings which manage their own memory
+ */
+
+String *string_new(const char *init)
+{
+    String *string=mem_new(String,1);
+    if (!init)
+	init="";
+    string->len=strlen(init);
+    string->alloc=string->len+1;
+    string->str=str_dup(init);
+    return string;
+}
+
+/*
+ * Free a string and either return the contents (if free_segment is FALSE)
+ * or free the contents as well and return NULL (if free_segment is TRUE).
+ */
+char *string_free(String *string,boolean free_segment)
+{
+    char *retval;
+    if (free_segment)
+    {
+	mem_free(string->str);
+	retval=NULL;
+    }
+    else
+	retval=string->str;
+    mem_free(string);
+    return retval;
+}
+
+/*
+ * Append a byte to string.
+ */
+void string_append_c(String *string,char c)
+{
+    if (string->len+1==string->alloc)
+    {
+	string->alloc*=2;
+	string->str=mem_renew(char,string->str,string->alloc);
+    }
+    string->str[string->len++]=c;
+    string->str[string->len]='\0';
+}
+
+/*
+ * Append len bytes from s to string. len may be passed as <0 if s is
+ * a nul-terminated string of unknown length.
+ */
+void string_append_len(String *string,const char *s,ssize_t len)
+{
+    if (len<0)
+	len=strlen(s);
+    if (string->len+len>=string->alloc)
+    {
+	while (string->len+len>=string->alloc)
+	    string->alloc*=2;
+	string->str=mem_renew(char,string->str,string->alloc);
+    }
+    memcpy(string->str+string->len,s,len);
+    string->len+=len;
+    string->str[string->len]='\0';
+}
+
+/*
+ * Sets the length of a String. If the length is less than the current length,
+ * the string will be truncated. If the length is greater than the current
+ * length, the contents of the newly added area are undefined. (However, as
+ * always, string->str[string->len] will be a nul byte.)
+ */
+void string_set_size(String *string,size_t len)
+{
+    if (len>=string->alloc)
+    {
+	while (len>=string->alloc)
+	    string->alloc*=2;
+	string->str=mem_renew(char,string->str,string->alloc);
+    }
+    string->len=len;
+    string->str[string->len]='\0';
+}
diff -r 000000000000 -r c2f4c0285180 gclib/gcstring.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/gcstring.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,18 @@
+#ifndef GC_STRING_H
+#define GC_STRING_H
+
+#include <unistd.h>
+#include <gclib/types.h>
+
+typedef struct {
+    char *str;
+    size_t alloc,len;
+} String;
+
+String *string_new(const char *init);
+char *string_free(String *string,boolean free_segment);
+void string_append_c(String *string,char c);
+void string_append_len(String *string,const char *s,ssize_t len);
+#define string_append(string,s)		string_append_len(string,s,-1)
+
+#endif /* GC_STRING_H */
diff -r 000000000000 -r c2f4c0285180 gclib/macros.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/macros.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,7 @@
+#ifndef FALSE
+#define FALSE	0
+#endif
+
+#ifndef TRUE
+#define TRUE	(!FALSE)
+#endif
diff -r 000000000000 -r c2f4c0285180 gclib/mem.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/mem.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,54 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <gclib/mem.h>
+
+/*
+ * A memory allocator that aborts on failure (so that the caller never
+ * needs to handle out of memory, which we assume is very unlikely to
+ * happen under normal circumstances on any modern machine).
+ */
+void *mem_alloc(size_t nmemb,size_t size)
+{
+    void *ptr=malloc(nmemb*size);
+    if (!ptr)
+    {
+	fprintf(stderr,
+	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
+	  (unsigned long)nmemb,(unsigned long)size);
+	abort();
+    }
+    return ptr;
+}
+
+/*
+ * As mem_new, but new memory is cleared to zero.
+ */
+void *mem_alloc0(size_t nmemb,size_t size)
+{
+    void *ptr=calloc(nmemb,size);
+    if (!ptr)
+    {
+	fprintf(stderr,
+	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
+	  (unsigned long)nmemb,(unsigned long)size);
+	abort();
+    }
+    return ptr;
+}
+
+/*
+ * Grow or shrink a memory block, aborting on failure.
+ */
+void *mem_realloc(void *ptr,size_t nmemb,size_t size)
+{
+    ptr=realloc(ptr,nmemb*size);
+    if (!ptr)
+    {
+	fprintf(stderr,
+	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
+	  (unsigned long)nmemb,(unsigned long)size);
+	abort();
+    }
+    return ptr;
+}
diff -r 000000000000 -r c2f4c0285180 gclib/mem.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/mem.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,13 @@
+#ifndef GC_MEM_H
+#define GC_MEM_H
+
+void *mem_alloc(size_t nmemb,size_t size);
+void *mem_alloc0(size_t nmemb,size_t size);
+void *mem_realloc(void *ptr,size_t nmemb,size_t size);
+
+#define mem_new(type,n)		((type *)mem_alloc(n,sizeof(type)))
+#define mem_new0(type,n)	((type *)mem_alloc0(n,sizeof(type)))
+#define mem_renew(type,ptr,n)	((type *)mem_realloc(ptr,n,sizeof(type)))
+#define mem_free(ptr)		free(ptr)
+
+#endif /* GC_MEM_H */
diff -r 000000000000 -r c2f4c0285180 gclib/spawn.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/spawn.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,84 @@
+#include <stdlib.h>
+#include <stdio.h>
+#ifndef WIN32
+#include <sys/wait.h>
+#endif
+#include <gclib/gclib.h>
+
+#define SPAWN_BUFSIZE	128
+
+boolean spawn_sync(char **argv,char **standard_output,int *exit_status)
+{
+/* Don't use g_spawn_sync on WIN32 for now to avoid needing the helper */
+#if HAVE_GLIB && !defined(WIN32)
+    char *standard_error;
+    GError *error=NULL;
+    gboolean retval;
+    GSpawnFlags flags=G_SPAWN_SEARCH_PATH;
+    if (!standard_output)
+	flags=G_SPAWN_STDOUT_TO_DEV_NULL;
+    retval=g_spawn_sync(NULL,argv,NULL,flags,NULL,NULL,standard_output,
+      &standard_error,exit_status,&error);
+    fputs(standard_error,stderr);
+    g_free(standard_error);
+    if (!retval)
+    {
+	fprintf(stderr,"%s\n",error->message);
+	g_error_free(error);
+    }
+    else if (exit_status)
+	*exit_status=WEXITSTATUS(*exit_status);
+    return retval;
+#else
+    FILE *fp;
+    int i,r;
+    size_t n,len;
+    String *command_line,*string;
+    command_line=string_new(NULL);
+    for(i=0;argv[i];i++)
+    {
+	if (i)
+	    string_append_c(command_line,' ');
+	string_append(command_line,argv[i]);
+    }
+    fp=popen(command_line->str,"r");
+    string_free(command_line,TRUE);
+    if (!fp)
+    {
+	perror(command_line->str);
+	return FALSE;
+    }
+    string=string_new(NULL);
+    do
+    {
+	len=string->len;
+	string_set_size(string,len+SPAWN_BUFSIZE);
+	n=fread(string->str+len,1,SPAWN_BUFSIZE,fp);
+	if (n<0)
+	{
+	    perror("fread");
+	    (void)pclose(fp);
+	    string_free(string,TRUE);
+	    return FALSE;
+	}
+	string_set_size(string,len+n);
+    } while(n);
+    r=pclose(fp);
+    if (r<0)
+    {
+	perror("pclose");
+	string_free(string,TRUE);
+	return FALSE;
+    }
+    else
+    {
+	if (exit_status)
+	    *exit_status=r;
+	if (standard_output)
+	    *standard_output=string_free(string,FALSE);
+	else
+	    string_free(string,TRUE);
+	return TRUE;
+    }
+#endif
+}
diff -r 000000000000 -r c2f4c0285180 gclib/spawn.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/spawn.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,8 @@
+#ifndef GC_SPAWN_H
+#define GC_SPAWN_H
+
+#include <gclib/gclib.h>
+
+boolean spawn_sync(char **argv,char **standard_output,int *exit_status);
+
+#endif /* GC_SPAWN_H */
diff -r 000000000000 -r c2f4c0285180 gclib/strfuncs.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/strfuncs.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,26 @@
+#include <stdlib.h>
+#include <string.h>
+#include <gclib/mem.h>
+#include <gclib/strfuncs.h>
+
+/*
+ * Like strndup, but only returns NULL if str is NULL.
+ * Note that this routine copies n bytes rather than n characters.
+ */
+char *str_ndup(const char *str,size_t n)
+{
+    char *dup;
+    if (!str)
+	return NULL;
+    dup=mem_alloc0(n+1,1);
+    strncpy(dup,str,n);
+    return dup;
+}
+
+/*
+ * Like strdup, but only returns NULL if str is NULL.
+ */
+char *str_dup(const char *str)
+{
+    return str_ndup(str,strlen(str));
+}
diff -r 000000000000 -r c2f4c0285180 gclib/strfuncs.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/strfuncs.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,7 @@
+#ifndef GC_STRFUNCS_H
+#define GC_STRFUNCS_H
+
+char *str_dup(const char *str);
+char *str_ndup(const char *str,size_t n);
+
+#endif /* GC_STRFUNCS_H */
diff -r 000000000000 -r c2f4c0285180 gclib/textfileutils.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/textfileutils.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,33 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <gclib/gclib.h>
+
+/*
+ * Read a file into memory (which should be freed with mem_free when no
+ * longer required). Returns NULL on error and outputs a suitable error
+ * message to stderr.
+ * DOS-style line endings are handled transparently even on platforms which
+ * don't normally use this format.
+ */
+boolean file_get_contents_text(const char *filename,char **contents,
+  size_t *length)
+{
+    int i;
+    char *raw;
+    size_t raw_length;
+    String *string;
+    if (!file_get_contents(filename,&raw,&raw_length))
+	return FALSE;
+    string=string_new(NULL);
+    for(i=0;i<raw_length;i++)
+	if (raw[i]!='\r')
+	    string_append_c(string,raw[i]);
+    mem_free(raw);
+    if (length)
+	*length=string->len;
+    if (contents)
+	*contents=string_free(string,FALSE);
+    else
+	string_free(string,TRUE);
+    return TRUE;
+}
diff -r 000000000000 -r c2f4c0285180 gclib/textfileutils.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/textfileutils.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,9 @@
+#ifndef GC_TEXTFILEUTILS_H
+#define GC_TEXTFILEUTILS_H
+
+#include <gclib/gclib.h>
+
+boolean file_get_contents_text(const char *filename,char **contents,
+  size_t *length);
+
+#endif /* GC_TEXTFILEUTILS_H */
diff -r 000000000000 -r c2f4c0285180 gclib/types.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/types.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,6 @@
+#ifndef GC_TYPES_H
+#define GC_TYPES_H
+
+typedef int boolean;
+
+#endif	/* GC_TYPES_H */
diff -r 000000000000 -r c2f4c0285180 gclib/utils.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/utils.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,46 @@
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <gclib/mem.h>
+#include <gclib/strfuncs.h>
+#include <gclib/utils.h>
+
+#define is_valid_drive(d)	((d)>='a' && (d)<='z' || (d)>='A' && (d)<='Z')
+
+/*
+ * Gets the last component of the filename. If filename ends with a directory
+ * separator it gets the component before the last slash. If filename consists
+ * only of directory separators (and on Windows, possibly a drive letter), a
+ * single separator is returned. If filename is empty, it gets ".".
+ */
+char *path_get_basename(const char *filename)
+{
+    ssize_t base,last_nonslash;
+    size_t len;
+    char *retval;
+    if (*filename=='\0')
+        return str_dup(".");
+    last_nonslash=strlen(filename)-1;
+    while (last_nonslash>=0 && GC_IS_DIR_SEPARATOR(filename[last_nonslash]))
+	last_nonslash--;
+    if (last_nonslash<0)
+	/* string only containing slashes */
+    return str_dup(GC_DIR_SEPARATOR_S);
+#ifdef WIN32
+    if (last_nonslash==1 && is_valid_drive(filename[0]) && filename[1]==':')
+	/* string only containing slashes and a drive */
+	return str_dup(GC_DIR_SEPARATOR_S);
+#endif
+    base=last_nonslash;
+    while (base>=0 && !GC_IS_DIR_SEPARATOR(filename[base]))
+	base--;
+#ifdef WIN32
+    if (base==-1 && is_valid_drive(filename[0]) && filename[1] == ':')
+	  base=1;
+#endif
+    len=last_nonslash-base;
+    retval=mem_alloc(len+1,1);
+    memcpy(retval,filename+base+1,len);
+    retval[len]='\0';
+    return retval;
+}
diff -r 000000000000 -r c2f4c0285180 gclib/utils.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gclib/utils.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,16 @@
+#ifndef GC_UTIL_H
+#define GC_UTIL_H
+
+#ifdef WIN32
+#define GC_DIR_SEPARATOR '\\'
+#define GC_DIR_SEPARATOR_S "\\"
+#define GC_IS_DIR_SEPARATOR(c) ((c)==GC_DIR_SEPARATOR || (c)=='/')
+#else
+#define GC_DIR_SEPARATOR '/'
+#define GC_DIR_SEPARATOR_S "/"
+#define GC_IS_DIR_SEPARATOR(c) ((c)==GC_DIR_SEPARATOR)
+#endif
+
+char *path_get_basename(const char *filename);
+
+#endif /* GC_UTIL_H */
diff -r 000000000000 -r c2f4c0285180 gutcheck/Makefile.am
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gutcheck/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,8 @@
+bin_PROGRAMS=gutcheck
+pkgdata_DATA=gutcheck.typ
+
+gutcheck.typ:	gutcheck.typ.in
+	sed 's/$$/\r/' $< > $@
+
+EXTRA_DIST=gutcheck.typ.in
+CLEANFILES=gutcheck.typ
diff -r 000000000000 -r c2f4c0285180 gutcheck/gutcheck.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gutcheck/gutcheck.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,2982 @@
+/*************************************************************************/
+/* gutcheck - check for assorted weirdnesses in a PG candidate text file */
+/*                                                                       */
+/* Version 0.991                                                         */
+/* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>                  */
+/*                                                                       */
+/* This program is free software; you can redistribute it and/or modify  */
+/* it under the terms of the GNU General Public License as published by  */
+/* the Free Software Foundation; either version 2 of the License, or     */
+/* (at your option) any later version.                                   */
+/*                                                                       */
+/* This program is distributed in the hope that it will be useful,       */
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of        */
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         */
+/* GNU General Public License for more details.                          */
+/*                                                                       */
+/* You should have received a copy of the GNU General Public License     */
+/* along with this program; if not, write to the                         */
+/*      Free Software Foundation, Inc.,                                  */
+/*      59 Temple Place,                                                 */
+/*      Suite 330,                                                       */
+/*      Boston, MA  02111-1307  USA                                      */
+/*                                                                       */
+/*                                                                       */
+/*                                                                       */
+/* Overview comments:                                                    */
+/*                                                                       */
+/* If you're reading this, you're either interested in how to detect     */
+/* formatting errors, or very very bored.                                */
+/*                                                                       */
+/* Gutcheck is a homebrew formatting checker specifically for            */
+/* spotting common formatting problems in a PG e-text. I typically       */
+/* run it once or twice on a file I'm about to submit; it usually        */
+/* finds a few formatting problems. It also usually finds lots of        */
+/* queries that aren't problems at all; it _really_ doesn't like         */
+/* the standard PG header, for example.  It's optimized for straight     */
+/* prose; poetry and non-fiction involving tables tend to trigger        */
+/* false alarms.                                                         */
+/*                                                                       */
+/* The code of gutcheck is not very interesting, but the experience      */
+/* of what constitutes a possible error may be, and the best way to      */
+/* illustrate that is by example.                                        */
+/*                                                                       */
+/*                                                                       */
+/* Here are some common typos found in PG texts that gutcheck            */
+/* will flag as errors:                                                  */
+/*                                                                       */
+/* "Look!John , over there!"                                             */
+/* <this is a HTML tag>                                                  */
+/* &so is this;                                                          */
+/* Margaret said: " Now you should start for school."                    */
+/* Margaret said: "Now you should start for school. (if end of para)     */
+/* The horse is said to he worth a lot.                                  */
+/* 0K - this'11 make you look close1y.                                   */
+/* "If you do. you'll regret it!"                                        */
+/*                                                                       */
+/* There are some complications . The extra space left around that       */
+/* period was an error . . . but that ellipsis wasn't.                   */
+/*                                                                       */
+/* The last line of a paragraph                                          */
+/* is usually short.                                                     */
+/*                                                                       */
+/* This period is an error.But the periods in a.m. aren't.               */
+/*                                                                       */
+/* Checks that are do-able but not (well) implemented are:               */
+/*        Single-quote chcking.                                          */
+/*          Despite 3 attempts at it, singlequote checking is still      */
+/*          crap in gutcheck. It may not be possible without analysis    */
+/*          of the whole paragraph.                                      */
+/*                                                                       */
+/*************************************************************************/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#define MAXWORDLEN    80    /* max length of one word             */
+#define LINEBUFSIZE 2048    /* buffer size for an input line      */
+
+#define MAX_USER_TYPOS 1000
+#define USERTYPO_FILE "gutcheck.typ"
+
+#ifndef MAX_PATH
+#define MAX_PATH 16384
+#endif
+
+char aline[LINEBUFSIZE];
+char prevline[LINEBUFSIZE];
+
+                 /* Common typos. */
+char *typo[] = { "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane", "nad",
+                "te", "ig", "acn",  "ahve", "alot", "anbd", "andt", "awya", "aywa", "bakc", "om",
+                "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt", "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr",
+                "hmi", "hse", "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd", "gerat", "goign",
+                "gruop", "haev", "hda", "hearign", "seeign", "sayign", "herat", "hge", "hsa", "hsi", "hte", "htere",
+                "htese", "htey", "htis", "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut", "loev",
+                "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter", "omre", "onyl", "otehr", "otu", "owrk",
+                "owuld", "peice", "peices", "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",
+                "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe", "sohw", "stnad", "stopry",
+                "stoyr", "stpo", "tahn", "taht", "tath", "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge",
+                "thier", "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne", "tirne", "tkae",
+                "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey", "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih",
+                "whihc", "whta", "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",
+                "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking", "wtih", "wuould", "wya", "yera",
+                "yeras", "yersa", "yoiu", "youve", "ytou", "yuor",
+                /* added h/b words for version 12 - removed a few with "tbe" v.25 */
+                "abead", "ahle", "ahout", "ahove", "altbough", "balf", "bardly", "bas", "bave", "baving", "bebind", 
+                "beld", "belp", "belped", "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge", "dehates", 
+                "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan", "hegin", "heing", 
+                "helieve", "henefit", "hetter", "hetween", "heyond", "hig", "higber", "huild", "huy", "hy", "jobn", "joh", 
+                "meanwbile", "memher", "memhers", "numher", "numhers", 
+                "perbaps", "prohlem", "puhlic", "witbout", 
+                /* and a few more for .18 */
+                "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud", "prornise", "prornised", "modem", "bo",
+                "heside", "chapteb", "chaptee", "se",
+                 ""};
+
+char *usertypo[MAX_USER_TYPOS];
+
+                 /* Common abbreviations and other OK words not to query as typos. */
+                 /* 0.99 last-minute - removed "ms"      */
+char *okword[] = {"mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm", "rd", "sh", "br",
+                  "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "pompeii","hawaii","hawaiian",
+                  "hotbed", "heartbeat", "heartbeats", "outbid", "outbids", "frostbite", "frostbitten",
+                  ""};
+
+                 /* Common abbreviations that cause otherwise unexplained periods. */
+char *abbrev[] = {"cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op", "cit",
+                  "deg", "min", "chap", "oz", "mme", "mlle", "mssrs",
+                  ""};
+                 /* Two-Letter combinations that rarely if ever start words, */
+                 /* but are common scannos or otherwise common letter        */
+                 /* combinations.                                            */
+char *nostart[] = { "hr", "hl", "cb", "sb", "tb", "wb", "tl",
+                    "tn", "rn", "lt", "tj",
+                    "" };
+
+                 /* Two-Letter combinations that rarely if ever end words    */
+                 /* but are common scannos or otherwise common letter        */
+                 /* combinations                                             */
+char *noend[]   = { "cb", "gb", "pb", "sb", "tb", 
+                    "wh","fr","br","qu","tw","gl","fl","sw","gr","sl","cl",
+                    "iy",
+                    ""};
+
+char *markup[]  = { "a", "b", "big", "blockquote", "body", "br", "center", 
+                    "col", "div", "em", "font", "h1", "h2", "h3", "h4", 
+                    "h5", "h6", "head", "hr", "html", "i", "img", "li", 
+                    "meta", "ol", "p", "pre", "small", "span", "strong", 
+                    "sub", "sup", "table", "td", "tfoot", "thead", "title", 
+                    "tr", "tt", "u", "ul", 
+                    ""};
+
+char *DPmarkup[] = { "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>",
+                    ""}; /* <tb> added .991 */
+
+char *nocomma[]  = { "the", "it's", "their", "an", "mrs", "a", "our", "that's",
+                     "its", "whose", "every", "i'll", "your", "my", 
+                     "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "rd", 
+                     "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", 
+                     "i'm", "during", "let", "toward", "among",
+                     ""};
+
+
+char *noperiod[] = { "every", "i'm", "during", "that's", "their", "your", "our", "my", "or", 
+                     "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether", 
+                     "i'll", "whose", "who", "because", "when", "let", "till", "very",
+                     "an", "among", "those", "into", "whom", "having", "thence",
+                     ""}; 
+
+
+char vowels[] = "aeiou�����������������������";  /* Carlo's old suggestion, updated .991 */
+
+struct {
+    char *htmlent;
+    char *htmlnum;
+    char *textent;
+    } entities[] = { "&amp;",           "&#38;",        "&", 
+                     "&lt;",            "&#60;",        "<",
+                     "&gt;",            "&#62;",        ">",
+                     "&deg;",           "&#176;",       " degrees",
+                     "&pound;",         "&#163;",       "L",
+                     "&quot;",          "&#34;",        "\"",   /* -- quotation mark = APL quote, */
+                     "&OElig;",         "&#338;",       "OE",  /* -- latin capital ligature OE, */
+                     "&oelig;",         "&#339;",       "oe",  /* -- latin small ligature oe, U+0153 ISOlat2 --> */
+                     "&Scaron;",        "&#352;",       "S",  /* -- latin capital letter S with caron, */
+                     "&scaron;",        "&#353;",       "s",  /* -- latin small letter s with caron, */
+                     "&Yuml;",          "&#376;",       "Y",  /* -- latin capital letter Y with diaeresis, */
+                     "&circ;",          "&#710;",       "",  /* -- modifier letter circumflex accent, */
+                     "&tilde;",         "&#732;",       "~",  /* -- small tilde, U+02DC ISOdia --> */
+                     "&ensp;",          "&#8194;",      " ", /* -- en space, U+2002 ISOpub --> */
+                     "&emsp;",          "&#8195;",      " ", /* -- em space, U+2003 ISOpub --> */
+                     "&thinsp;",        "&#8201;",      " ", /* -- thin space, U+2009 ISOpub --> */
+                     "&ndash;",         "&#8211;",      "-", /* -- en dash, U+2013 ISOpub --> */
+                     "&mdash;",         "&#8212;",      "--", /* -- em dash, U+2014 ISOpub --> */
+                     "&lsquo;",         "&#8216;",      "'", /* -- left single quotation mark, */
+                     "&rsquo;",         "&#8217;",      "'", /* -- right single quotation mark, */
+                     "&sbquo;",         "&#8218;",      "'", /* -- single low-9 quotation mark, U+201A NEW --> */
+                     "&ldquo;",         "&#8220;",      "\"", /* -- left double quotation mark, */
+                     "&rdquo;",         "&#8221;",      "\"", /* -- right double quotation mark, */
+                     "&bdquo;",         "&#8222;",      "\"", /* -- double low-9 quotation mark, U+201E NEW --> */
+                     "&lsaquo;",        "&#8249;",      "\"", /* -- single left-pointing angle quotation mark, */
+                     "&rsaquo;",        "&#8250;",      "\"", /* -- single right-pointing angle quotation mark, */
+                     "&nbsp;",          "&#160;",       " ", /* -- no-break space = non-breaking space, */
+                     "&iexcl;",         "&#161;",       "!", /* -- inverted exclamation mark, U+00A1 ISOnum --> */
+                     "&cent;",          "&#162;",       "c", /* -- cent sign, U+00A2 ISOnum --> */
+                     "&pound;",         "&#163;",       "L", /* -- pound sign, U+00A3 ISOnum --> */
+                     "&curren;",        "&#164;",       "$", /* -- currency sign, U+00A4 ISOnum --> */
+                     "&yen;",           "&#165;",       "Y", /* -- yen sign = yuan sign, U+00A5 ISOnum --> */
+                     "&sect;",          "&#167;",       "--", /* -- section sign, U+00A7 ISOnum --> */
+                     "&uml;",           "&#168;",       " ", /* -- diaeresis = spacing diaeresis, */
+                     "&copy;",          "&#169;",       "(C) ", /* -- copyright sign, U+00A9 ISOnum --> */
+                     "&ordf;",          "&#170;",       " ", /* -- feminine ordinal indicator, U+00AA ISOnum --> */
+                     "&laquo;",         "&#171;",       "\"", /* -- left-pointing double angle quotation mark */
+                     "&shy;",           "&#173;",       "-", /* -- soft hyphen = discretionary hyphen, */
+                     "&reg;",           "&#174;",       "(R) ", /* -- registered sign = registered trade mark sign, */
+                     "&macr;",          "&#175;",       " ", /* -- macron = spacing macron = overline */
+                     "&deg;",           "&#176;",       " degrees", /* -- degree sign, U+00B0 ISOnum --> */
+                     "&plusmn;",        "&#177;",       "+-", /* -- plus-minus sign = plus-or-minus sign, */
+                     "&sup2;",          "&#178;",       "2", /* -- superscript two = superscript digit two */
+                     "&sup3;",          "&#179;",       "3", /* -- superscript three = superscript digit three */
+                     "&acute;",         "&#180;",       " ", /* -- acute accent = spacing acute, */
+                     "&micro;",         "&#181;",       "m", /* -- micro sign, U+00B5 ISOnum --> */
+                     "&para;",          "&#182;",       "--", /* -- pilcrow sign = paragraph sign, */
+                     "&cedil;",         "&#184;",       " ", /* -- cedilla = spacing cedilla, U+00B8 ISOdia --> */
+                     "&sup1;",          "&#185;",       "1", /* -- superscript one = superscript digit one, */
+                     "&ordm;",          "&#186;",       " ", /* -- masculine ordinal indicator, */
+                     "&raquo;",         "&#187;",       "\"", /* -- right-pointing double angle quotation mark */
+                     "&frac14;",        "&#188;",       "1/4", /* -- vulgar fraction one quarter */
+                     "&frac12;",        "&#189;",       "1/2", /* -- vulgar fraction one half */
+                     "&frac34;",        "&#190;",       "3/4", /* -- vulgar fraction three quarters */
+                     "&iquest;",        "&#191;",       "?", /* -- inverted question mark */
+                     "&Agrave;",        "&#192;",       "A", /* -- latin capital letter A with grave */
+                     "&Aacute;",        "&#193;",       "A", /* -- latin capital letter A with acute, */
+                     "&Acirc;",         "&#194;",       "A", /* -- latin capital letter A with circumflex, */
+                     "&Atilde;",        "&#195;",       "A", /* -- latin capital letter A with tilde, */
+                     "&Auml;",          "&#196;",       "A", /* -- latin capital letter A with diaeresis, */
+                     "&Aring;",         "&#197;",       "A", /* -- latin capital letter A with ring above */
+                     "&AElig;",         "&#198;",       "AE", /* -- latin capital letter AE */
+                     "&Ccedil;",        "&#199;",       "C", /* -- latin capital letter C with cedilla, */
+                     "&Egrave;",        "&#200;",       "E", /* -- latin capital letter E with grave, */
+                     "&Eacute;",        "&#201;",       "E", /* -- latin capital letter E with acute, */
+                     "&Ecirc;",         "&#202;",       "E", /* -- latin capital letter E with circumflex, */
+                     "&Euml;",          "&#203;",       "E", /* -- latin capital letter E with diaeresis, */
+                     "&Igrave;",        "&#204;",       "I", /* -- latin capital letter I with grave, */
+                     "&Iacute;",        "&#205;",       "I", /* -- latin capital letter I with acute, */
+                     "&Icirc;",         "&#206;",       "I", /* -- latin capital letter I with circumflex, */
+                     "&Iuml;",          "&#207;",       "I", /* -- latin capital letter I with diaeresis, */
+                     "&ETH;",           "&#208;",       "E", /* -- latin capital letter ETH, U+00D0 ISOlat1 --> */
+                     "&Ntilde;",        "&#209;",       "N", /* -- latin capital letter N with tilde, */
+                     "&Ograve;",        "&#210;",       "O", /* -- latin capital letter O with grave, */
+                     "&Oacute;",        "&#211;",       "O", /* -- latin capital letter O with acute, */
+                     "&Ocirc;",         "&#212;",       "O", /* -- latin capital letter O with circumflex, */
+                     "&Otilde;",        "&#213;",       "O", /* -- latin capital letter O with tilde, */
+                     "&Ouml;",          "&#214;",       "O", /* -- latin capital letter O with diaeresis, */
+                     "&times;",         "&#215;",       "*", /* -- multiplication sign, U+00D7 ISOnum --> */
+                     "&Oslash;",        "&#216;",       "O", /* -- latin capital letter O with stroke */
+                     "&Ugrave;",        "&#217;",       "U", /* -- latin capital letter U with grave, */
+                     "&Uacute;",        "&#218;",       "U", /* -- latin capital letter U with acute, */
+                     "&Ucirc;",         "&#219;",       "U", /* -- latin capital letter U with circumflex, */
+                     "&Uuml;",          "&#220;",       "U", /* -- latin capital letter U with diaeresis, */
+                     "&Yacute;",        "&#221;",       "Y", /* -- latin capital letter Y with acute, */
+                     "&THORN;",         "&#222;",       "TH", /* -- latin capital letter THORN, */
+                     "&szlig;",         "&#223;",       "sz", /* -- latin small letter sharp s = ess-zed, */
+                     "&agrave;",        "&#224;",       "a", /* -- latin small letter a with grave */
+                     "&aacute;",        "&#225;",       "a", /* -- latin small letter a with acute, */
+                     "&acirc;",         "&#226;",       "a", /* -- latin small letter a with circumflex, */
+                     "&atilde;",        "&#227;",       "a", /* -- latin small letter a with tilde, */
+                     "&auml;",          "&#228;",       "a", /* -- latin small letter a with diaeresis, */
+                     "&aring;",         "&#229;",       "a", /* -- latin small letter a with ring above */
+                     "&aelig;",         "&#230;",       "ae", /* -- latin small letter ae */
+                     "&ccedil;",        "&#231;",       "c", /* -- latin small letter c with cedilla, */
+                     "&egrave;",        "&#232;",       "e", /* -- latin small letter e with grave, */
+                     "&eacute;",        "&#233;",       "e", /* -- latin small letter e with acute, */
+                     "&ecirc;",         "&#234;",       "e", /* -- latin small letter e with circumflex, */
+                     "&euml;",          "&#235;",       "e", /* -- latin small letter e with diaeresis, */
+                     "&igrave;",        "&#236;",       "i", /* -- latin small letter i with grave, */
+                     "&iacute;",        "&#237;",       "i", /* -- latin small letter i with acute, */
+                     "&icirc;",         "&#238;",       "i", /* -- latin small letter i with circumflex, */
+                     "&iuml;",          "&#239;",       "i", /* -- latin small letter i with diaeresis, */
+                     "&eth;",           "&#240;",       "eth", /* -- latin small letter eth, U+00F0 ISOlat1 --> */
+                     "&ntilde;",        "&#241;",       "n", /* -- latin small letter n with tilde, */
+                     "&ograve;",        "&#242;",       "o", /* -- latin small letter o with grave, */
+                     "&oacute;",        "&#243;",       "o", /* -- latin small letter o with acute, */
+                     "&ocirc;",         "&#244;",       "o", /* -- latin small letter o with circumflex, */
+                     "&otilde;",        "&#245;",       "o", /* -- latin small letter o with tilde, */
+                     "&ouml;",          "&#246;",       "o", /* -- latin small letter o with diaeresis, */
+                     "&divide;",        "&#247;",       "/", /* -- division sign, U+00F7 ISOnum --> */
+                     "&oslash;",        "&#248;",       "o", /* -- latin small letter o with stroke, */
+                     "&ugrave;",        "&#249;",       "u", /* -- latin small letter u with grave, */
+                     "&uacute;",        "&#250;",       "u", /* -- latin small letter u with acute, */
+                     "&ucirc;",         "&#251;",       "u", /* -- latin small letter u with circumflex, */
+                     "&uuml;",          "&#252;",       "u", /* -- latin small letter u with diaeresis, */
+                     "&yacute;",        "&#253;",       "y", /* -- latin small letter y with acute, */
+                     "&thorn;",         "&#254;",       "th", /* -- latin small letter thorn, */
+                     "&yuml;",          "&#255;",       "y", /* -- latin small letter y with diaeresis, */
+                      "", "" };
+                    
+/* ---- list of special characters ---- */
+#define CHAR_SPACE        32
+#define CHAR_TAB           9
+#define CHAR_LF           10
+#define CHAR_CR           13
+#define CHAR_DQUOTE       34
+#define CHAR_SQUOTE       39
+#define CHAR_OPEN_SQUOTE  96
+#define CHAR_TILDE       126
+#define CHAR_ASTERISK     42
+#define CHAR_FORESLASH    47
+#define CHAR_CARAT        94
+
+#define CHAR_UNDERSCORE    '_'
+#define CHAR_OPEN_CBRACK   '{'
+#define CHAR_CLOSE_CBRACK  '}'
+#define CHAR_OPEN_RBRACK   '('
+#define CHAR_CLOSE_RBRACK  ')'
+#define CHAR_OPEN_SBRACK   '['
+#define CHAR_CLOSE_SBRACK  ']'
+
+
+
+
+
+/* ---- longest and shortest normal PG line lengths ----*/
+#define LONGEST_PG_LINE   75
+#define WAY_TOO_LONG      80
+#define SHORTEST_PG_LINE  55
+
+#define SWITCHES "ESTPXLOYHWVMUD" /* switches:-                            */
+                                  /*     D - ignore DP-specific markup     */
+                                  /*     E - echo queried line             */
+                                  /*     S - check single quotes           */
+                                  /*     T - check common typos            */
+                                  /*     P - require closure of quotes on  */
+                                  /*         every paragraph               */
+                                  /*     X - "Trust no one" :-) Paranoid!  */
+                                  /*         Queries everything            */
+                                  /*     L - line end checking defaults on */
+                                  /*         -L turns it off               */
+                                  /*     O - overview. Just shows counts.  */
+                                  /*     Y - puts errors to stdout         */
+                                  /*         instead of stderr             */
+                                  /*     H - Echoes header fields          */
+                                  /*     M - Ignore markup in < >          */
+                                  /*     U - Use file of User-defined Typos*/
+                                  /*     W - Defaults for use on Web upload*/
+                                  /*     V - Verbose - list EVERYTHING!    */
+#define SWITNO 14                 /* max number of switch parms            */
+                                  /*        - used for defining array-size */
+#define MINARGS   1               /* minimum no of args excl switches      */
+#define MAXARGS   1               /* maximum no of args excl switches      */
+
+int pswit[SWITNO];                /* program switches set by SWITCHES      */
+
+#define ECHO_SWITCH      0
+#define SQUOTE_SWITCH    1
+#define TYPO_SWITCH      2
+#define QPARA_SWITCH     3
+#define PARANOID_SWITCH  4
+#define LINE_END_SWITCH  5
+#define OVERVIEW_SWITCH  6
+#define STDOUT_SWITCH    7
+#define HEADER_SWITCH    8
+#define WEB_SWITCH       9
+#define VERBOSE_SWITCH   10
+#define MARKUP_SWITCH    11
+#define USERTYPO_SWITCH  12
+#define DP_SWITCH        13
+
+
+
+long cnt_dquot;       /* for overview mode, count of doublequote queries */
+long cnt_squot;       /* for overview mode, count of singlequote queries */
+long cnt_brack;       /* for overview mode, count of brackets queries */
+long cnt_bin;         /* for overview mode, count of non-ASCII queries */
+long cnt_odd;         /* for overview mode, count of odd character queries */
+long cnt_long;        /* for overview mode, count of long line errors */
+long cnt_short;       /* for overview mode, count of short line queries */
+long cnt_punct;       /* for overview mode, count of punctuation and spacing queries */
+long cnt_dash;        /* for overview mode, count of dash-related queries */
+long cnt_word;        /* for overview mode, count of word queries */
+long cnt_html;        /* for overview mode, count of html queries */
+long cnt_lineend;     /* for overview mode, count of line-end queries */
+long cnt_spacend;     /* count of lines with space at end  V .21 */
+long linecnt;         /* count of total lines in the file */
+long checked_linecnt; /* count of lines actually gutchecked V .26 */
+
+void proghelp(void);
+void procfile(char *);
+
+#define LOW_THRESHOLD    0
+#define HIGH_THRESHOLD   1
+
+#define START 0
+#define END 1
+#define PREV 0
+#define NEXT 1
+#define FIRST_OF_PAIR 0
+#define SECOND_OF_PAIR 1
+
+#define MAX_WORDPAIR 1000
+
+char running_from[MAX_PATH];
+
+int mixdigit(char *);
+char *getaword(char *, char *);
+int matchword(char *, char *);
+char *flgets(char *, int, FILE *, long);
+void lowerit(char *);
+int gcisalpha(unsigned char);
+int gcisdigit(unsigned char);
+int gcisletter(unsigned char);
+char *gcstrchr(char *s, char c);
+void postprocess_for_HTML(char *);
+char *linehasmarkup(char *);
+char *losemarkup(char *);
+int tagcomp(char *, char *);
+char *loseentities(char *);
+int isroman(char *);
+int usertypo_count;
+void postprocess_for_DP(char *);
+
+char wrk[LINEBUFSIZE];
+
+/* This is disgustingly lazy, predefining max words & lengths,   */
+/* but now I'm out of 16-bit restrictions, what's a couple of K? */
+#define MAX_QWORD           50
+#define MAX_QWORD_LENGTH    40
+char qword[MAX_QWORD][MAX_QWORD_LENGTH];
+char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
+signed int dupcnt[MAX_QWORD];
+
+
+
+
+int main(int argc, char **argv)
+{
+    char *argsw, *s;
+    int i, switno, invarg;
+    char usertypo_file[MAX_PATH];
+    FILE *usertypofile;
+
+
+    if (strlen(argv[0]) < sizeof(running_from))
+        strcpy(running_from, argv[0]);  /* save the path to the executable gutcheck */
+
+    /* find out what directory we're running from */
+    for (s = running_from + strlen(running_from); *s != '/' && *s != '\\' && s >= running_from; s--)
+        *s = 0;
+
+
+    switno = strlen(SWITCHES);
+    for (i = switno ; --i >0 ; )
+        pswit[i] = 0;           /* initialise switches */
+
+    /* Standard loop to extract switches.                   */
+    /* When we come out of this loop, the arguments will be */
+    /* in argv[0] upwards and the switches used will be     */
+    /* represented by their equivalent elements in pswit[]  */
+    while ( --argc > 0 && **++argv == '-')
+        for (argsw = argv[0]+1; *argsw !='\0'; argsw++)
+            for (i = switno, invarg = 1; (--i >= 0) && invarg == 1 ; )
+                if ((toupper(*argsw)) == SWITCHES[i] ) {
+                    invarg = 0;
+                    pswit[i] = 1;
+                    }
+
+    pswit[PARANOID_SWITCH] ^= 1;         /* Paranoid checking is turned OFF, not on, by its switch */
+
+    if (pswit[PARANOID_SWITCH]) {                         /* if running in paranoid mode */
+        pswit[TYPO_SWITCH] = pswit[TYPO_SWITCH] ^ 1;      /* force typo checks as well   */
+        }                                                 /* v.20 removed s and p switches from paranoid mode */
+
+    pswit[LINE_END_SWITCH] ^= 1;         /* Line-end checking is turned OFF, not on, by its switch */
+    pswit[ECHO_SWITCH] ^= 1;             /* V.21 Echoing is turned OFF, not on, by its switch      */
+
+    if (pswit[OVERVIEW_SWITCH])       /* just print summary; don't echo */
+        pswit[ECHO_SWITCH] = 0;
+
+    /* Web uploads - for the moment, this is really just a placeholder     */
+    /* until we decide what processing we really want to do on web uploads */
+    if (pswit[WEB_SWITCH]) {          /* specific override for web uploads */
+        pswit[ECHO_SWITCH] =     1;
+        pswit[SQUOTE_SWITCH] =   0;
+        pswit[TYPO_SWITCH] =     1;
+        pswit[QPARA_SWITCH] =    0;
+        pswit[PARANOID_SWITCH] = 1;
+        pswit[LINE_END_SWITCH] = 0;
+        pswit[OVERVIEW_SWITCH] = 0;
+        pswit[STDOUT_SWITCH] =   0;
+        pswit[HEADER_SWITCH] =   1;
+        pswit[VERBOSE_SWITCH] =  0;
+        pswit[MARKUP_SWITCH] =   0;
+        pswit[USERTYPO_SWITCH] = 0;
+        pswit[DP_SWITCH] = 0;
+        }
+
+
+    if (argc < MINARGS || argc > MAXARGS) {  /* check number of args */
+        proghelp();
+        return(1);            /* exit */
+        }
+
+
+    /* read in the user-defined stealth scanno list */
+
+    if (pswit[USERTYPO_SWITCH]) {                    /* ... we were told we had one! */
+        if ((usertypofile = fopen(USERTYPO_FILE, "rb")) == NULL) {   /* not in cwd. try gutcheck directory. */
+            strcpy(usertypo_file, running_from);
+            strcat(usertypo_file, USERTYPO_FILE);
+            if ((usertypofile = fopen(usertypo_file, "rb")) == NULL) {  /* we ain't got no user typo file! */
+                printf("   --> I couldn't find gutcheck.typ -- proceeding without user typos.\n");
+                }
+            }
+
+        usertypo_count = 0;
+        if (usertypofile) {  /* we managed to open a User Typo File! */
+            if (pswit[USERTYPO_SWITCH]) {
+                while (flgets(aline, LINEBUFSIZE-1, usertypofile, (long)usertypo_count)) {
+                    if (strlen(aline) > 1) {
+                        if ((int)*aline > 33) {
+                            s = malloc(strlen(aline)+1);
+                            if (!s) {
+                                fprintf(stderr, "gutcheck: cannot get enough memory for user typo file!!\n");
+                                exit(1);
+                                }
+                            strcpy(s, aline);
+                            usertypo[usertypo_count] = s;
+                            usertypo_count++;
+                            if (usertypo_count >= MAX_USER_TYPOS) {
+                                printf("   --> Only %d user-defined typos allowed: ignoring the rest\n");
+                                break;
+                                }
+                            }
+                        }
+                    }
+                }
+            fclose(usertypofile);
+            }
+        }
+
+
+
+
+    fprintf(stderr, "gutcheck: Check and report on an e-text\n");
+
+    cnt_dquot = cnt_squot = cnt_brack = cnt_bin = cnt_odd = cnt_long =
+    cnt_short = cnt_punct = cnt_dash = cnt_word = cnt_html = cnt_lineend =
+    cnt_spacend = 0;
+
+    procfile(argv[0]);
+
+    if (pswit[OVERVIEW_SWITCH]) {
+                         printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
+                            checked_linecnt, linecnt, linecnt - checked_linecnt);
+                         printf("    --------------- Queries found --------------\n");
+        if (cnt_long)    printf("    Long lines:                             %5ld\n",cnt_long);
+        if (cnt_short)   printf("    Short lines:                            %5ld\n",cnt_short);
+        if (cnt_lineend) printf("    Line-end problems:                      %5ld\n",cnt_lineend);
+        if (cnt_word)    printf("    Common typos:                           %5ld\n",cnt_word);
+        if (cnt_dquot)   printf("    Unmatched quotes:                       %5ld\n",cnt_dquot);
+        if (cnt_squot)   printf("    Unmatched SingleQuotes:                 %5ld\n",cnt_squot);
+        if (cnt_brack)   printf("    Unmatched brackets:                     %5ld\n",cnt_brack);
+        if (cnt_bin)     printf("    Non-ASCII characters:                   %5ld\n",cnt_bin);
+        if (cnt_odd)     printf("    Proofing characters:                    %5ld\n",cnt_odd);
+        if (cnt_punct)   printf("    Punctuation & spacing queries:          %5ld\n",cnt_punct);
+        if (cnt_dash)    printf("    Non-standard dashes:                    %5ld\n",cnt_dash);
+        if (cnt_html)    printf("    Possible HTML tags:                     %5ld\n",cnt_html);
+        printf("\n");
+        printf("    TOTAL QUERIES                           %5ld\n",
+            cnt_dquot + cnt_squot + cnt_brack + cnt_bin + cnt_odd + cnt_long +
+            cnt_short + cnt_punct + cnt_dash + cnt_word + cnt_html + cnt_lineend);
+        }
+
+    return(0);
+}
+
+
+
+/* procfile - process one file */
+
+void procfile(char *filename)
+{
+
+    char *s, *t, *s1, laststart, *wordstart;
+    char inword[MAXWORDLEN], testword[MAXWORDLEN];
+    char parastart[81];     /* first line of current para */
+    FILE *infile;
+    long quot, squot, firstline, alphalen, totlen, binlen,
+         shortline, longline, verylongline, spacedash, emdash,
+         space_emdash, non_PG_space_emdash, PG_space_emdash,
+         footerline, dotcomma, start_para_line, astline, fslashline,
+         standalone_digit, hyphens, htmcount, endquote_count;
+    long spline, nspline;
+    signed int i, j, llen, isemptyline, isacro, isellipsis, istypo, alower,
+         eNon_A, eTab, eTilde, eAst, eFSlash, eCarat;
+    signed int warn_short, warn_long, warn_bin, warn_dash, warn_dotcomma,
+         warn_ast, warn_fslash, warn_digit, warn_hyphen, warn_endquote;
+    unsigned int lastlen, lastblen;
+    signed int s_brack, c_brack, r_brack, c_unders;
+    signed int open_single_quote, close_single_quote, guessquote, dquotepar, squotepar;
+    signed int isnewpara, vowel, consonant;
+    char dquote_err[80], squote_err[80], rbrack_err[80], sbrack_err[80], cbrack_err[80],
+         unders_err[80];
+    signed int qword_index, qperiod_index, isdup;
+    signed int enddash;
+    signed int Dutchcount, isDutch, Frenchcount, isFrench;
+
+
+    
+
+
+    laststart = CHAR_SPACE;
+    lastlen = lastblen = 0;
+    *dquote_err = *squote_err = *rbrack_err = *cbrack_err = *sbrack_err =
+        *unders_err = *prevline = 0;
+    linecnt = firstline = alphalen = totlen = binlen =
+        shortline = longline = spacedash = emdash = checked_linecnt =
+        space_emdash = non_PG_space_emdash = PG_space_emdash =
+        footerline = dotcomma = start_para_line = astline = fslashline = 
+        standalone_digit = hyphens = htmcount = endquote_count = 0;
+    quot = squot = s_brack = c_brack = r_brack = c_unders = 0;
+    i = llen = isemptyline = isacro = isellipsis = istypo = 0;
+    warn_short = warn_long = warn_bin = warn_dash = warn_dotcomma = 
+        warn_ast = warn_fslash = warn_digit = warn_endquote = 0;
+    isnewpara = vowel = consonant = enddash = 0;
+    spline = nspline = 0;
+    qword_index = qperiod_index = isdup = 0;
+    *inword = *testword = 0;
+    open_single_quote = close_single_quote = guessquote = dquotepar = squotepar = 0;
+    Dutchcount = isDutch = Frenchcount = isFrench = 0;
+
+
+    for (j = 0; j < MAX_QWORD; j++) {
+        dupcnt[j] = 0;
+        for (i = 0; i < MAX_QWORD_LENGTH; i++)
+            qword[i][j] = 0;
+            qperiod[i][j] = 0;
+            }
+
+
+    if ((infile = fopen(filename, "rb")) == NULL) {
+        if (pswit[STDOUT_SWITCH])
+            fprintf(stdout, "gutcheck: cannot open %s\n", filename);
+        else
+            fprintf(stderr, "gutcheck: cannot open %s\n", filename);
+        exit(1);
+        }
+
+    fprintf(stdout, "\n\nFile: %s\n\n", filename);
+    firstline = shortline = longline = verylongline = 0;
+
+
+    /*****************************************************/
+    /*                                                   */
+    /*  Run a first pass - verify that it's a valid PG   */
+    /*  file, decide whether to report some things that  */
+    /*  occur many times in the text like long or short  */
+    /*  lines, non-standard dashes, and other good stuff */
+    /*  I'll doubtless think of later.                   */
+    /*                                                   */
+    /*****************************************************/
+
+    /*****************************************************/
+    /* V.24  Sigh. Yet Another Header Change             */
+    /*****************************************************/
+
+    while (fgets(aline, LINEBUFSIZE-1, infile)) {
+        while (aline[strlen(aline)-1] == 10 || aline[strlen(aline)-1] == 13 ) aline[strlen(aline)-1] = 0;
+        linecnt++;
+        if (strstr(aline, "*END") && strstr(aline, "SMALL PRINT") && (strstr(aline, "PUBLIC DOMAIN") || strstr(aline, "COPYRIGHT"))) {
+            if (spline)
+                printf("   --> Duplicate header?\n");
+            spline = linecnt + 1;   /* first line of non-header text, that is */
+            }
+        if (!strncmp(aline, "*** START", 9) && strstr(aline, "PROJECT GUTENBERG")) {
+            if (nspline)
+                printf("   --> Duplicate header?\n");
+            nspline = linecnt + 1;   /* first line of non-header text, that is */
+            }
+        if (spline || nspline) {
+            lowerit(aline);
+            if (strstr(aline, "end") && strstr(aline, "project gutenberg")) {
+                if (strstr(aline, "end") < strstr(aline, "project gutenberg")) {
+                    if (footerline) {
+                        if (!nspline) /* it's an old-form header - we can detect duplicates */
+                            printf("   --> Duplicate footer?\n");
+                        else 
+                            ;
+                        }
+                    else {
+                        footerline = linecnt;
+                        }
+                    }
+                }
+            }
+        if (spline) firstline = spline;
+        if (nspline) firstline = nspline;  /* override with new */
+
+        if (footerline) continue;    /* 0.99+ don't count the boilerplate in the footer */
+
+        llen = strlen(aline);
+        totlen += llen;
+        for (i = 0; i < llen; i++) {
+            if ((unsigned char)aline[i] > 127) binlen++;
+            if (gcisalpha(aline[i])) alphalen++;
+            if (i > 0)
+                if (aline[i] == CHAR_DQUOTE && isalpha(aline[i-1]))
+                    endquote_count++;
+            }
+        if (strlen(aline) > 2
+            && lastlen > 2 && lastlen < SHORTEST_PG_LINE
+            && lastblen > 2 && lastblen > SHORTEST_PG_LINE
+            && laststart != CHAR_SPACE)
+                shortline++;
+
+        if (*aline) /* fixed line below for 0.96 */
+            if ((unsigned char)aline[strlen(aline)-1] <= CHAR_SPACE) cnt_spacend++;
+
+        if (strstr(aline, ".,")) dotcomma++;
+        /* 0.98 only count ast lines for ignoring purposes where there is */
+        /* locase text on the line */
+        if (strstr(aline, "*")) {
+            for (s = aline; *s; s++)
+                if (*s >='a' && *s <= 'z')
+                    break;
+             if (*s) astline++;
+             }
+        if (strstr(aline, "/"))
+            fslashline++;
+        for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
+        if (aline[i] == '-' && aline[i-1] != '-') hyphens++;
+
+        if (llen > LONGEST_PG_LINE) longline++;
+        if (llen > WAY_TOO_LONG) verylongline++;
+
+        if (strstr(aline, "<") && strstr(aline, ">")) {
+            i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
+            if (i > 0) 
+                htmcount++;
+            if (strstr(aline, "<i>")) htmcount +=4; /* bonus marks! */
+            }
+
+        /* Check for spaced em-dashes */
+        if (strstr(aline,"--")) {
+            emdash++;
+            if (*(strstr(aline, "--")-1) == CHAR_SPACE ||
+               (*(strstr(aline, "--")+2) == CHAR_SPACE))
+                    space_emdash++;
+            if (*(strstr(aline, "--")-1) == CHAR_SPACE &&
+               (*(strstr(aline, "--")+2) == CHAR_SPACE))
+                    non_PG_space_emdash++;             /* count of em-dashes with spaces both sides */
+            if (*(strstr(aline, "--")-1) != CHAR_SPACE &&
+               (*(strstr(aline, "--")+2) != CHAR_SPACE))
+                    PG_space_emdash++;                 /* count of PG-type em-dashes with no spaces */
+            }
+
+        for (s = aline; *s;) {
+            s = getaword(s, inword);
+            if (!strcmp(inword, "hij") || !strcmp(inword, "niet")) 
+                Dutchcount++;
+            if (!strcmp(inword, "dans") || !strcmp(inword, "avec")) 
+                Frenchcount++;
+            if (!strcmp(inword, "0") || !strcmp(inword, "1")) 
+                standalone_digit++;
+            }
+
+        /* Check for spaced dashes */
+        if (strstr(aline," -"))
+            if (*(strstr(aline, " -")+2) != '-')
+                    spacedash++;
+        lastblen = lastlen;
+        lastlen = strlen(aline);
+        laststart = aline[0];
+
+        }
+    fclose(infile);
+
+
+    /* now, based on this quick view, make some snap decisions */
+    if (cnt_spacend > 0) {
+        printf("   --> %ld lines in this file have white space at end\n", cnt_spacend);
+        }
+
+    warn_dotcomma = 1;
+    if (dotcomma > 5) {
+        warn_dotcomma = 0;
+        printf("   --> %ld lines in this file contain '.,'. Not reporting them.\n", dotcomma);
+        }
+
+    /* if more than 50 lines, or one-tenth, are short, don't bother reporting them */
+    warn_short = 1;
+    if (shortline > 50 || shortline * 10 > linecnt) {
+        warn_short = 0;
+        printf("   --> %ld lines in this file are short. Not reporting short lines.\n", shortline);
+        }
+
+    /* if more than 50 lines, or one-tenth, are long, don't bother reporting them */
+    warn_long = 1;
+    if (longline > 50 || longline * 10 > linecnt) {
+        warn_long = 0;
+        printf("   --> %ld lines in this file are long. Not reporting long lines.\n", longline);
+        }
+
+    /* if more than 10 lines contain asterisks, don't bother reporting them V.0.97 */
+    warn_ast = 1;
+    if (astline > 10 ) {
+        warn_ast = 0;
+        printf("   --> %ld lines in this file contain asterisks. Not reporting them.\n", astline);
+        }
+
+    /* if more than 10 lines contain forward slashes, don't bother reporting them V.0.99 */
+    warn_fslash = 1;
+    if (fslashline > 10 ) {
+        warn_fslash = 0;
+        printf("   --> %ld lines in this file contain forward slashes. Not reporting them.\n", fslashline);
+        }
+
+    /* if more than 20 lines contain unpunctuated endquotes, don't bother reporting them V.0.99 */
+    warn_endquote = 1;
+    if (endquote_count > 20 ) {
+        warn_endquote = 0;
+        printf("   --> %ld lines in this file contain unpunctuated endquotes. Not reporting them.\n", endquote_count);
+        }
+
+    /* if more than 15 lines contain standalone digits, don't bother reporting them V.0.97 */
+    warn_digit = 1;
+    if (standalone_digit > 10 ) {
+        warn_digit = 0;
+        printf("   --> %ld lines in this file contain standalone 0s and 1s. Not reporting them.\n", standalone_digit);
+        }
+
+    /* if more than 20 lines contain hyphens at end, don't bother reporting them V.0.98 */
+    warn_hyphen = 1;
+    if (hyphens > 20 ) {
+        warn_hyphen = 0;
+        printf("   --> %ld lines in this file have hyphens at end. Not reporting them.\n", hyphens);
+        }
+
+    if (htmcount > 20 && !pswit[MARKUP_SWITCH]) {
+        printf("   --> Looks like this is HTML. Switching HTML mode ON.\n");
+        pswit[MARKUP_SWITCH] = 1;
+        }
+        
+    if (verylongline > 0) {
+        printf("   --> %ld lines in this file are VERY long!\n", verylongline);
+        }
+
+    /* If there are more non-PG spaced dashes than PG em-dashes,    */
+    /* assume it's deliberate                                       */
+    /* Current PG guidelines say don't use them, but older texts do,*/
+    /* and some people insist on them whatever the guidelines say.  */
+    /* V.20 removed requirement that PG_space_emdash be greater than*/
+    /* ten before turning off warnings about spaced dashes.         */
+    warn_dash = 1;
+    if (spacedash + non_PG_space_emdash > PG_space_emdash) {
+        warn_dash = 0;
+        printf("   --> There are %ld spaced dashes and em-dashes. Not reporting them.\n", spacedash + non_PG_space_emdash);
+        }
+
+    /* if more than a quarter of characters are hi-bit, bug out */
+    warn_bin = 1;
+    if (binlen * 4 > totlen) {
+        printf("   --> This file does not appear to be ASCII. Terminating. Best of luck with it!\n");
+        exit(1);
+        }
+    if (alphalen * 4 < totlen) {
+        printf("   --> This file does not appear to be text. Terminating. Best of luck with it!\n");
+        exit(1);
+        }
+    if ((binlen * 100 > totlen) || (binlen > 100)) {
+        printf("   --> There are a lot of foreign letters here. Not reporting them.\n");
+        warn_bin = 0;
+        }
+
+    /* isDutch and isFrench added .991 Feb 06 for Frank, Jeroen, Renald */
+    isDutch = 0;
+    if (Dutchcount > 50) {
+        isDutch = 1;
+        printf("   --> This looks like Dutch - switching off dashes and warnings for 's Middags case.\n");
+        }
+
+    isFrench = 0;
+    if (Frenchcount > 50) {
+        isFrench = 1;
+        printf("   --> This looks like French - switching off some doublepunct.\n");
+        }
+
+    if (firstline && footerline)
+        printf("    The PG header and footer appear to be already on.\n");
+    else {
+        if (firstline)
+            printf("    The PG header is on - no footer.\n");
+        if (footerline)
+            printf("    The PG footer is on - no header.\n");
+        }
+    printf("\n");
+
+    /* V.22 George Davis asked for an override switch to force it to list everything */
+    if (pswit[VERBOSE_SWITCH]) {
+        warn_bin = 1;
+        warn_short = 1;
+        warn_dotcomma = 1;
+        warn_long = 1;
+        warn_dash = 1;
+        warn_digit = 1;
+        warn_ast = 1;
+        warn_fslash = 1;
+        warn_hyphen = 1;
+        warn_endquote = 1;
+        printf("   *** Verbose output is ON -- you asked for it! ***\n");
+        }
+
+    if (isDutch)
+        warn_dash = 0;  /* Frank suggested turning it REALLY off for Dutch */
+
+    if ((infile = fopen(filename, "rb")) == NULL) {
+        if (pswit[STDOUT_SWITCH])
+            fprintf(stdout, "gutcheck: cannot open %s\n", filename);
+        else
+            fprintf(stderr, "gutcheck: cannot open %s\n", filename);
+        exit(1);
+        }
+
+    if (footerline > 0 && firstline > 0 && footerline > firstline && footerline - firstline < 100) { /* ugh */
+        printf("   --> I don't really know where this text starts. \n");
+        printf("       There are no reference points.\n");
+        printf("       I'm going to have to report the header and footer as well.\n");
+        firstline=0;
+        }
+        
+
+
+    /*****************************************************/
+    /*                                                   */
+    /* Here we go with the main pass. Hold onto yer hat! */
+    /*                                                   */
+    /*****************************************************/
+
+    /* Re-init some variables we've dirtied */
+    quot = squot = linecnt = 0;
+    laststart = CHAR_SPACE;
+    lastlen = lastblen = 0;
+
+    while (flgets(aline, LINEBUFSIZE-1, infile, linecnt+1)) {
+        linecnt++;
+        if (linecnt == 1) isnewpara = 1;
+        if (pswit[DP_SWITCH])
+            if (!strncmp(aline, "-----File: ", 11))
+                continue;    // skip DP page separators completely
+        if (linecnt < firstline || (footerline > 0 && linecnt > footerline)) {
+            if (pswit[HEADER_SWITCH]) {
+                if (!strncmp(aline, "Title:", 6))
+                    printf("    %s\n", aline);
+                if (!strncmp (aline, "Author:", 7))
+                    printf("    %s\n", aline);
+                if (!strncmp(aline, "Release Date:", 13))
+                    printf("    %s\n", aline);
+                if (!strncmp(aline, "Edition:", 8))
+                    printf("    %s\n\n", aline);
+                }
+            continue;                /* skip through the header */
+            }
+        checked_linecnt++;
+        s = aline;
+        isemptyline = 1;      /* assume the line is empty until proven otherwise */
+
+        /* If we are in a state of unbalanced quotes, and this line    */
+        /* doesn't begin with a quote, output the stored error message */
+        /* If the -P switch was used, print the warning even if the    */
+        /* new para starts with quotes                                 */
+        /* Version .20 - if the new paragraph does start with a quote, */
+        /* but is indented, I was giving a spurious error. Need to     */
+        /* check the first _non-space_ character on the line rather    */
+        /* than the first character when deciding whether the para     */
+        /* starts with a quote. Using *t for this.                     */
+        t = s;
+        while (*t == ' ') t++;
+        if (*dquote_err)
+            if (*t != CHAR_DQUOTE || pswit[QPARA_SWITCH]) {
+                if (!pswit[OVERVIEW_SWITCH]) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
+                    printf(dquote_err);
+                    }
+                else
+                    cnt_dquot++;
+            }
+        if (*squote_err) {
+            if (*t != CHAR_SQUOTE && *t != CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] || squot) {
+                if (!pswit[OVERVIEW_SWITCH]) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
+                    printf(squote_err);
+                    }
+                else
+                    cnt_squot++;
+                }
+            squot = 0;
+            }
+        if (*rbrack_err) {
+            if (!pswit[OVERVIEW_SWITCH]) {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
+                printf(rbrack_err);
+                }
+            else
+                cnt_brack++;
+            }
+        if (*sbrack_err) {
+            if (!pswit[OVERVIEW_SWITCH]) {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
+                printf(sbrack_err);
+                }
+            else
+                cnt_brack++;
+            }
+        if (*cbrack_err) {
+            if (!pswit[OVERVIEW_SWITCH]) {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
+                printf(cbrack_err);
+                }
+            else
+                cnt_brack++;
+            }
+        if (*unders_err) {
+            if (!pswit[OVERVIEW_SWITCH]) {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
+                printf(unders_err);
+                }
+            else
+                cnt_brack++;
+            }
+
+        *dquote_err = *squote_err = *rbrack_err = *cbrack_err = 
+            *sbrack_err = *unders_err = 0;
+
+
+        /* look along the line, accumulate the count of quotes, and see */
+        /* if this is an empty line - i.e. a line with nothing on it    */
+        /* but spaces.                                                  */
+        /* V .12 also if line has just spaces, * and/or - on it, don't  */
+        /* count it, since empty lines with asterisks or dashes to      */
+        /* separate sections are common.                                */
+        /* V .15 new single-quote checking - has to be better than the  */
+        /* previous version, but how much better? fingers crossed!      */
+        /* V .20 add period to * and - as characters on a separator line*/
+        s = aline;
+        while (*s) {
+            if (*s == CHAR_DQUOTE) quot++;
+            if (*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
+                if (s == aline) { /* at start of line, it can only be an openquote */
+                    if (strncmp(s+2, "tis", 3) && strncmp(s+2, "Tis", 3)) /* hardcode a very common exception! */
+                        open_single_quote++;
+                    }
+                else
+                    if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
+                        ; /* do nothing! - it's definitely an apostrophe, not a quote */
+                    else        /* it's outside a word - let's check it out */
+                        if (*s == CHAR_OPEN_SQUOTE || gcisalpha(*(s+1))) { /* it damwell better BE an openquote */
+                            if (strncmp(s+1, "tis", 3) && strncmp(s+1, "Tis", 3)) /* hardcode a very common exception! */
+                                open_single_quote++;
+                            }
+                        else { /* now - is it a closequote? */
+                            guessquote = 0;   /* accumulate clues */
+                            if (gcisalpha(*(s-1))) { /* it follows a letter - could be either */
+                                guessquote += 1;
+                                if (*(s-1) == 's') { /* looks like a plural apostrophe */
+                                    guessquote -= 3;
+                                    if (*(s+1) == CHAR_SPACE)  /* bonus marks! */
+                                        guessquote -= 2;
+                                    }
+                                }
+                            else /* it doesn't have a letter either side */
+                                if (strchr(".?!,;:", *(s-1)) && (strchr(".?!,;: ", *(s+1))))
+                                    guessquote += 8; /* looks like a closequote */
+                                else
+                                    guessquote += 1;
+                            if (open_single_quote > close_single_quote)
+                                guessquote += 1; /* give it the benefit of some doubt - if a squote is already open */
+                            else
+                                guessquote -= 1;
+                            if (guessquote >= 0)
+                                close_single_quote++;
+                            }
+
+            if (*s != CHAR_SPACE
+                && *s != '-'
+                && *s != '.'
+                && *s != CHAR_ASTERISK
+                && *s != 13
+                && *s != 10) isemptyline = 0;  /* ignore lines like  *  *  *  as spacers */
+            if (*s == CHAR_UNDERSCORE) c_unders++;
+            if (*s == CHAR_OPEN_CBRACK) c_brack++;
+            if (*s == CHAR_CLOSE_CBRACK) c_brack--;
+            if (*s == CHAR_OPEN_RBRACK) r_brack++;
+            if (*s == CHAR_CLOSE_RBRACK) r_brack--;
+            if (*s == CHAR_OPEN_SBRACK) s_brack++;
+            if (*s == CHAR_CLOSE_SBRACK) s_brack--;
+            s++;
+            }
+
+        if (isnewpara && !isemptyline) {   /* This line is the start of a new paragraph */
+            start_para_line = linecnt;
+            strncpy(parastart, aline, 80); /* Capture its first line in case we want to report it later */
+            parastart[79] = 0;
+            dquotepar = squotepar = 0; /* restart the quote count 0.98 */
+            s = aline;
+            while (!gcisalpha(*s) && !gcisdigit(*s) && *s) s++;    /* V.97 fixed bug - overran line and gave false warning - rare */
+            if (*s >= 'a' && *s <='z') { /* and its first letter is lowercase */
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column %d - Paragraph starts with lower-case\n", linecnt, (int)(s - aline) +1);
+                else
+                    cnt_punct++;
+                }
+            isnewpara = 0; /* Signal the end of new para processing */
+            }
+
+        /* Check for an em-dash broken at line end */
+        if (enddash && *aline == '-') {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column 1 - Broken em-dash?\n", linecnt);
+            else
+                cnt_punct++;
+            }
+        enddash = 0;
+        for (s = aline + strlen(aline) - 1; *s == ' ' && s > aline; s--);
+        if (s >= aline && *s == '-')
+            enddash = 1;
+            
+
+        /* Check for invalid or questionable characters in the line */
+        /* Anything above 127 is invalid for plain ASCII,  and      */
+        /* non-printable control characters should also be flagged. */
+        /* Tabs should generally not be there.                      */
+        /* Jan 06, in 0.99: Hm. For some strange reason, I either   */
+        /* never created or deleted the check for unprintable       */
+        /* control characters. They should be reported even if      */
+        /* warn_bin is on, I think, and in full.                    */
+
+        for (s = aline; *s; s++) {
+            i = (unsigned char) *s;
+            if (i < CHAR_SPACE && i != CHAR_LF && i != CHAR_CR && i != CHAR_TAB) {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column %d - Control character %d\n", linecnt, (int) (s - aline) + 1, i);
+                else
+                    cnt_bin++;
+                }
+            }
+
+        if (warn_bin) {
+            eNon_A = eTab = eTilde = eCarat = eFSlash = eAst = 0;  /* don't repeat multiple warnings on one line */
+            for (s = aline; *s; s++) {
+                if (!eNon_A && ((*s < CHAR_SPACE && *s != 9 && *s != '\n') || (unsigned char)*s > 127)) {
+                    i = *s;                           /* annoying kludge for signed chars */
+                    if (i < 0) i += 256;
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        if (i > 127 && i < 160)
+                            printf("    Line %ld column %d - Non-ISO-8859 character %d\n", linecnt, (int) (s - aline) + 1, i);
+                        else
+                            printf("    Line %ld column %d - Non-ASCII character %d\n", linecnt, (int) (s - aline) + 1, i);
+                    else
+                        cnt_bin++;
+                    eNon_A = 1;
+                    }
+                if (!eTab && *s == CHAR_TAB) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Tab character?\n", linecnt, (int) (s - aline) + 1);
+                    else
+                        cnt_odd++;
+                    eTab = 1;
+                    }
+                if (!eTilde && *s == CHAR_TILDE) {  /* often used by OCR software to indicate an unrecognizable character */
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Tilde character?\n", linecnt, (int) (s - aline) + 1);
+                    else
+                        cnt_odd++;
+                    eTilde = 1;
+                    }
+                if (!eCarat && *s == CHAR_CARAT) {  
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Carat character?\n", linecnt, (int) (s - aline) + 1);
+                    else
+                        cnt_odd++;
+                    eCarat = 1;
+                    }
+                if (!eFSlash && *s == CHAR_FORESLASH && warn_fslash) {  
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Forward slash?\n", linecnt, (int) (s - aline) + 1);
+                    else
+                        cnt_odd++;
+                    eFSlash = 1;
+                    }
+                /* report asterisks only in paranoid mode, since they're often deliberate */
+                if (!eAst && pswit[PARANOID_SWITCH] && warn_ast && !isemptyline && *s == CHAR_ASTERISK) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Asterisk?\n", linecnt, (int) (s - aline) + 1);
+                    else
+                        cnt_odd++;
+                    eAst = 1;
+                    }
+                }
+            }
+
+        /* Check for line too long */
+        if (warn_long) {
+            if (strlen(aline) > LONGEST_PG_LINE) {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column %d - Long line %d\n", linecnt, strlen(aline), strlen(aline));
+                else
+                    cnt_long++;
+                }
+            }
+
+        /* Check for line too short.                                     */
+        /* This one is a bit trickier to implement: we don't want to     */
+        /* flag the last line of a paragraph for being short, so we      */
+        /* have to wait until we know that our current line is a         */
+        /* "normal" line, then report the _previous_ line if it was too  */
+        /* short. We also don't want to report indented lines like       */
+        /* chapter heads or formatted quotations. We therefore keep      */
+        /* lastlen as the length of the last line examined, and          */
+        /* lastblen as the length of the last but one, and try to        */
+        /* suppress unnecessary warnings by checking that both were of   */
+        /* "normal" length. We keep the first character of the last      */
+        /* line in laststart, and if it was a space, we assume that the  */
+        /* formatting is deliberate. I can't figure out a way to         */
+        /* distinguish something like a quoted verse left-aligned or     */
+        /* the header or footer of a letter from a paragraph of short    */
+        /* lines - maybe if I examined the whole paragraph, and if the   */
+        /* para has less than, say, 8 lines and if all lines are short,  */
+        /* then just assume it's OK? Need to look at some texts to see   */
+        /* how often a formula like this would get the right result.     */
+        /* V0.99 changed the tolerance for length to ignore from 2 to 1  */
+        if (warn_short) {
+            if (strlen(aline) > 1
+                && lastlen > 1 && lastlen < SHORTEST_PG_LINE
+                && lastblen > 1 && lastblen > SHORTEST_PG_LINE
+                && laststart != CHAR_SPACE) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Short line %d?\n", linecnt-1, strlen(prevline), strlen(prevline));
+                    else
+                        cnt_short++;
+                    }
+            }
+        lastblen = lastlen;
+        lastlen = strlen(aline);
+        laststart = aline[0];
+
+        /* look for punctuation at start of line */
+        if  (*aline && strchr(".?!,;:",  aline[0]))  {            /* if it's punctuation */
+            if (strncmp(". . .", aline, 5)) {   /* exception for ellipsis: V.98 tightened up to except only a full ellipsis */
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column 1 - Begins with punctuation?\n", linecnt);
+                else
+                    cnt_punct++;
+                }
+            }
+
+        /* Check for spaced em-dashes                            */
+        /* V.20 must check _all_ occurrences of "--" on the line */
+        /* hence the loop - even if the first double-dash is OK  */
+        /* there may be another that's wrong later on.           */
+        if (warn_dash) {
+            s = aline;
+            while (strstr(s,"--")) {
+                if (*(strstr(s, "--")-1) == CHAR_SPACE ||
+                   (*(strstr(s, "--")+2) == CHAR_SPACE)) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Spaced em-dash?\n", linecnt, (int) (strstr(s,"--") - aline) + 1);
+                    else
+                        cnt_dash++;
+                    }
+                s = strstr(s,"--") + 2;
+                }
+            }
+
+        /* Check for spaced dashes */
+        if (warn_dash)
+            if (strstr(aline," -")) {
+                if (*(strstr(aline, " -")+2) != '-') {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline," -") - aline) + 1);
+                    else
+                        cnt_dash++;
+                    }
+                }
+            else
+                if (strstr(aline,"- ")) {
+                    if (*(strstr(aline, "- ")-1) != '-') {
+                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                        if (!pswit[OVERVIEW_SWITCH])
+                            printf("    Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline,"- ") - aline) + 1);
+                        else
+                            cnt_dash++;
+                        }
+                    }
+
+        /* v 0.99                                                       */
+        /* Check for unmarked paragraphs indicated by separate speakers */
+        /* May well be false positive:                                  */
+        /* "Bravo!" "Wonderful!" called the crowd.                      */
+        /* but useful all the same.                                     */
+        s = wrk;
+        *s = 0;
+        if (strstr(aline, "\" \"")) s = strstr(aline, "\" \"");
+        if (strstr(aline, "\"  \"")) s = strstr(aline, "\"  \"");
+        if (*s) {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column %d - Query missing paragraph break?\n", linecnt, (int)(s - aline) +1);
+            else
+                cnt_punct++;
+            }
+
+
+
+        /* Check for "to he" and other easy he/be errors          */
+        /* This is a very inadequate effort on the he/be problem, */
+        /* but the phrase "to he" is always an error, whereas "to */
+        /* be" is quite common. I chuckle when it does catch one! */
+        /* Similarly, '"Quiet!", be said.' is a non-be error      */
+        /* V .18 - "to he" is _not_ always an error!:             */
+        /*           "Where they went to he couldn't say."        */
+        /* but I'm leaving it in anyway.                          */
+        /* V .20 Another false positive:                          */
+        /*       What would "Cinderella" be without the . . .     */
+        /* and another "If he wants to he can see for himself."   */
+        /* V .21 Added " is be " and " be is " and " be was "     */
+        /* V .99 Added jeebies code -- removed again.             */
+        /*       Is jeebies code worth adding? Rare to see he/be  */
+        /*       errors with modern OCR. Separate program? Yes!   */
+        /*       jeebies does the job without cluttering up this. */
+        /*       We do get a few more queryable pairs from the    */
+        /*       project though -- they're cheap to implement.    */
+        /*       Also added a column number for guiguts.          */
+
+        s = wrk;
+        *s = 0;
+        if (strstr(aline," to he ")) s = strstr(aline," to he ");
+        if (strstr(aline,"\" be ")) s = strstr(aline,"\" be ");
+        if (strstr(aline,"\", be ")) s = strstr(aline,"\", be ");
+        if (strstr(aline," is be ")) s = strstr(aline," is be ");
+        if (strstr(aline," be is ")) s = strstr(aline," be is ");
+        if (strstr(aline," was be ")) s = strstr(aline," was be ");
+        if (strstr(aline," be would ")) s = strstr(aline," be would ");
+        if (strstr(aline," be could ")) s = strstr(aline," be could ");
+        if (*s) {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column %d - Query he/be error?\n", linecnt, (int)(s - aline) +1);
+            else
+                cnt_word++;
+            }
+
+        s = wrk;
+        *s = 0;
+        if (strstr(aline," i bad ")) s = strstr(aline," i bad ");
+        if (strstr(aline," you bad ")) s = strstr(aline," you bad ");
+        if (strstr(aline," he bad ")) s = strstr(aline," he bad ");
+        if (strstr(aline," she bad ")) s = strstr(aline," she bad ");
+        if (strstr(aline," they bad ")) s = strstr(aline," they bad ");
+        if (strstr(aline," a had ")) s = strstr(aline," a had ");
+        if (strstr(aline," the had ")) s = strstr(aline," the had ");
+        if (*s) {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column %d - Query had/bad error?\n", linecnt, (int)(s - aline) +1);
+            else
+                cnt_word++;
+            }
+
+
+        /* V .97 Added ", hut "  Not too common, hut pretty certain   */
+        /* V.99 changed to add a column number for guiguts            */
+        s = wrk;
+        *s = 0;
+        if (strstr(aline,", hut ")) s = strstr(aline,", hut ");
+        if (strstr(aline,"; hut ")) s = strstr(aline,"; hut ");
+        if (*s) {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column %d - Query hut/but error?\n", linecnt, (int)(s - aline) +1);
+            else
+                cnt_word++;
+            }
+
+        /* Special case - angled bracket in front of "From" placed there by an MTA */
+        /* when sending an e-mail.  V .21                                          */
+        if (strstr(aline, ">From")) {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column %d - Query angled bracket with From\n", linecnt, (int)(strstr(aline, ">From") - aline) +1);
+            else
+                cnt_punct++;
+            }
+
+        /* V 0.98 Check for a single character line - often an overflow from bad wrapping. */
+        if (*aline && !*(aline+1)) {
+            if (*aline == 'I' || *aline == 'V' || *aline == 'X' || *aline == 'L' || gcisdigit(*aline))
+                ; /* nothing - ignore numerals alone on a line. */
+            else {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column 1 - Query single character line\n", linecnt);
+                else
+                    cnt_punct++;
+                }
+            }
+
+        /* V 0.98 Check for I" - often should be ! */
+        if (strstr(aline, " I\"")) {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column %ld - Query I=exclamation mark?\n", linecnt, strstr(aline, " I\"") - aline);
+            else
+                cnt_punct++;
+            }
+
+        /* V 0.98 Check for period without a capital letter. Cut-down from gutspell */
+        /*        Only works when it happens on a single line.                      */
+
+        if (pswit[PARANOID_SWITCH])
+            for (t = s = aline; strstr(t,". ");) {
+                t = strstr(t, ". ");
+                if (t == s)  {
+                    t++;
+                    continue; /* start of line punctuation is handled elsewhere */
+                    }
+                if (!gcisalpha(*(t-1))) {
+                    t++;
+                    continue;
+                    }
+                if (isDutch) {  /* For Frank & Jeroen -- 's Middags case */
+                    if (*(t+2) == CHAR_SQUOTE &&
+                      *(t+3)>='a' && *(t+3)<='z' &&
+                      *(t+4) == CHAR_SPACE &&
+                      *(t+5)>='A' && *(t+5)<='Z') {
+                        t++;
+                        continue;
+                        }
+                      }
+                s1 = t+2;
+                while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
+                    s1++;
+                if (*s1 >= 'a' && *s1 <= 'z') {  /* we have something to investigate */
+                    istypo = 1;
+                    for (s1 = t - 1; s1 >= s && 
+                        (gcisalpha(*s1) || gcisdigit(*s1) || 
+                        (*s1 == CHAR_SQUOTE && gcisalpha(*(s1+1)) && gcisalpha(*(s1-1)))); s1--); /* so let's go back and find out */
+                    s1++;
+                    for (i = 0; *s1 && *s1 != '.'; s1++, i++)
+                        testword[i] = *s1;
+                    testword[i] = 0;
+                    for (i = 0; *abbrev[i]; i++)
+                        if (!strcmp(testword, abbrev[i]))
+                            istypo = 0;
+//                    if (*testword >= 'A' && *testword <= 'Z') 
+//                        istypo = 0;
+                    if (gcisdigit(*testword)) istypo = 0;
+                    if (!*(testword+1)) istypo = 0;
+                    if (isroman(testword)) istypo = 0;
+                    if (istypo) {
+                        istypo = 0;
+                        for (i = 0; testword[i]; i++)
+                            if (strchr(vowels, testword[i]))
+                                istypo = 1;
+                        }
+                    if (istypo) {
+                        isdup = 0;
+                        if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
+                            for (i = 0; i < qperiod_index; i++)
+                                if (!strcmp(testword, qperiod[i])) {
+                                    isdup = 1;
+                                    }
+                        if (!isdup) {
+                            if (qperiod_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
+                                strcpy(qperiod[qperiod_index], testword);
+                                qperiod_index++;
+                                }
+                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                            if (!pswit[OVERVIEW_SWITCH])
+                                printf("    Line %ld column %d - Extra period?\n", linecnt, (int)(t - aline)+1);
+                            else
+                                cnt_punct++;
+                            }
+                        }
+                    }
+                t++;
+                }
+
+
+        if (pswit[TYPO_SWITCH]) {    /* Should have put this condition in at the start of 0.99. Duh! */
+            /* Check for words usually not followed by punctuation 0.99 */
+            for (s = aline; *s;) {
+                wordstart = s;
+                s = getaword(s, inword);
+                if (!*inword) continue;
+                lowerit(inword);
+                for (i = 0; *nocomma[i]; i++)
+                    if (!strcmp(inword, nocomma[i])) {
+                        if (*s == ',' || *s == ';' || *s == ':') {
+                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                            if (!pswit[OVERVIEW_SWITCH])
+                                printf("    Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
+                            else
+                                cnt_punct++;
+                            }
+                        }
+                for (i = 0; *noperiod[i]; i++)
+                    if (!strcmp(inword, noperiod[i])) {
+                        if (*s == '.' || *s == '!') {
+                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                            if (!pswit[OVERVIEW_SWITCH])
+                                printf("    Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
+                            else
+                                cnt_punct++;
+                            }
+                        }
+                }
+            }
+
+
+
+        /* Check for commonly mistyped words, and digits like 0 for O in a word */
+        for (s = aline; *s;) {
+            wordstart = s;
+            s = getaword(s, inword);
+            if (!*inword) continue; /* don't bother with empty lines */
+            if (mixdigit(inword)) {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column %ld - Query digit in %s\n", linecnt, (int)(wordstart - aline) + 1, inword);
+                else
+                    cnt_word++;
+                }
+
+            /* put the word through a series of tests for likely typos and OCR errors */
+            /* V.21 I had allowed lots of typo-checking even with the typo switch     */
+            /* turned off, but I really should disallow reporting of them when        */
+            /* the switch is off. Hence the "if" below.                               */
+            if (pswit[TYPO_SWITCH]) {
+                istypo = 0;
+                strcpy(testword, inword);
+                alower = 0;
+                for (i = 0; i < (signed int)strlen(testword); i++) { /* lowercase for testing */
+                    if (testword[i] >= 'a' && testword[i] <= 'z') alower = 1;
+                    if (alower && testword[i] >= 'A' && testword[i] <= 'Z') {
+                        /* we have an uppercase mid-word. However, there are common cases: */
+                        /*   Mac and Mc like McGill                                        */
+                        /*   French contractions like l'Abbe                               */
+                        if ((i == 2 && testword[0] == 'm' && testword[1] == 'c') ||
+                            (i == 3 && testword[0] == 'm' && testword[1] == 'a' && testword[2] == 'c') ||
+                            (i > 0 && testword[i-1] == CHAR_SQUOTE))
+                                ; /* do nothing! */
+
+                        else {  /* V.97 - remove separate case of uppercase within word so that         */
+                                /* names like VanAllen fall into qword_index and get reported only once */
+                            istypo = 1;
+                            }
+                        }
+                    testword[i] = (char)tolower(testword[i]);
+                    }
+
+                /* check for certain unlikely two-letter combinations at word start and end */
+                /* V.0.97 - this replaces individual hardcoded checks in previous versions */
+                if (strlen(testword) > 1) {
+                    for (i = 0; *nostart[i]; i++)
+                        if (!strncmp(testword, nostart[i], 2))
+                            istypo = 1;
+                    for (i = 0; *noend[i]; i++)
+                        if (!strncmp(testword + strlen(testword) -2, noend[i], 2))
+                            istypo = 1;
+                    }
+
+
+                /* ght is common, gbt never. Like that. */
+                if (strstr(testword, "cb")) istypo = 1;
+                if (strstr(testword, "gbt")) istypo = 1;
+                if (strstr(testword, "pbt")) istypo = 1;
+                if (strstr(testword, "tbs")) istypo = 1;
+                if (strstr(testword, "mrn")) istypo = 1;
+                if (strstr(testword, "ahle")) istypo = 1;
+                if (strstr(testword, "ihle")) istypo = 1;
+
+                /* "TBE" does happen - like HEARTBEAT - but uncommon.                    */
+                /*  Also "TBI" - frostbite, outbid - but uncommon.                       */
+                /*  Similarly "ii" like Hawaii, or Pompeii, and in Roman numerals,       */
+                /*  but these are covered in V.20. "ii" is a common scanno.              */
+                if (strstr(testword, "tbi")) istypo = 1;
+                if (strstr(testword, "tbe")) istypo = 1;
+                if (strstr(testword, "ii")) istypo = 1;
+
+                /* check for no vowels or no consonants. */
+                /* If none, flag a typo                  */
+                if (!istypo && strlen(testword)>1) {
+                    vowel = consonant = 0;
+                    for (i = 0; testword[i]; i++)
+                        if (testword[i] == 'y' || gcisdigit(testword[i])) {  /* Yah, this is loose. */
+                            vowel++;
+                            consonant++;
+                            }
+                        else
+                            if  (strchr(vowels, testword[i])) vowel++;
+                            else consonant++;
+                    if (!vowel || !consonant) {
+                        istypo = 1;
+                        }
+                    }
+
+                /* now exclude the word from being reported if it's in */
+                /* the okword list                                     */
+                for (i = 0; *okword[i]; i++)
+                    if (!strcmp(testword, okword[i]))
+                        istypo = 0;
+
+                /* what looks like a typo may be a Roman numeral. Exclude these */
+                if (istypo)
+                    if (isroman(testword))
+                        istypo = 0;
+
+                /* check the manual list of typos */
+                if (!istypo)
+                    for (i = 0; *typo[i]; i++)
+                        if (!strcmp(testword, typo[i]))
+                            istypo = 1;
+
+
+                /* V.21 - check lowercase s and l - special cases */
+                /* V.98 - added "i" and "m"                       */
+                /* V.99 - added "j" often a semi-colon gone wrong */
+                /*      - and "d" for a missing apostrophe - he d */
+                /*      - and "n" for "in"                        */
+                if (!istypo && strlen(testword) == 1)
+                    if (strchr("slmijdn", *inword))
+                        istypo = 1;
+
+
+                if (istypo) {
+                    isdup = 0;
+                    if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
+                        for (i = 0; i < qword_index; i++)
+                            if (!strcmp(testword, qword[i])) {
+                                isdup = 1;
+                                ++dupcnt[i];
+                                }
+                    if (!isdup) {
+                        if (qword_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
+                            strcpy(qword[qword_index], testword);
+                            qword_index++;
+                            }
+                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                        if (!pswit[OVERVIEW_SWITCH]) {
+                            printf("    Line %ld column %d - Query word %s", linecnt, (int)(wordstart - aline) + 1, inword);
+                            if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
+                                printf(" - not reporting duplicates");
+                            printf("\n");
+                            }
+                        else
+                            cnt_word++;
+                        }
+                    }
+                }        /* end of typo-checking */
+
+                /* check the user's list of typos */
+                if (!istypo)
+                    if (usertypo_count)
+                        for (i = 0; i < usertypo_count; i++)
+                            if (!strcmp(testword, usertypo[i])) {
+                                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                                if (!pswit[OVERVIEW_SWITCH])  
+                                    printf("    Line %ld column %d - Query possible scanno %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
+                                }
+
+
+
+            if (pswit[PARANOID_SWITCH] && warn_digit) {   /* in paranoid mode, query all 0 and 1 standing alone - added warn_digit V.97*/
+                if (!strcmp(inword, "0") || !strcmp(inword, "1")) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Query standalone %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
+                    else
+                        cnt_word++;
+                    }
+                }
+            }
+
+        /* look for added or missing spaces around punctuation and quotes */
+        /* If there is a punctuation character like ! with no space on    */
+        /* either side, suspect a missing!space. If there are spaces on   */
+        /* both sides , assume a typo. If we see a double quote with no   */
+        /* space or punctuation on either side of it, assume unspaced     */
+        /* quotes "like"this.                                             */
+        llen = strlen(aline);
+        for (i = 1; i < llen; i++) {                               /* for each character in the line after the first */
+            if  (strchr(".?!,;:_", aline[i])) {                    /* if it's punctuation */
+                isacro = 0;                       /* we need to suppress warnings for acronyms like M.D. */
+                isellipsis = 0;                   /* we need to suppress warnings for ellipsis . . . */
+                if ( (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) ||     /* if there are letters on both sides of it or ... */
+                   (gcisalpha(aline[i+1]) && strchr("?!,;:", aline[i]))) { /* ...if it's strict punctuation followed by an alpha */
+                    if (aline[i] == '.') {
+                        if (i > 2)
+                            if (aline[i-2] == '.') isacro = 1;
+                        if (i + 2 < llen)
+                            if (aline[i+2] == '.') isacro = 1;
+                        }
+                    if (!isacro) {
+                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                        if (!pswit[OVERVIEW_SWITCH])
+                            printf("    Line %ld column %d - Missing space?\n", linecnt, i+1);
+                        else
+                            cnt_punct++;
+                        }
+                    }
+                if (aline[i-1] == CHAR_SPACE && (aline[i+1] == CHAR_SPACE || aline[i+1] == 0)) { /* if there are spaces on both sides, or space before and end of line */
+                    if (aline[i] == '.') {
+                        if (i > 2)
+                            if (aline[i-2] == '.') isellipsis = 1;
+                        if (i + 2 < llen)
+                            if (aline[i+2] == '.') isellipsis = 1;
+                        }
+                    if (!isemptyline && !isellipsis) {
+                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                        if (!pswit[OVERVIEW_SWITCH])
+                            printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
+                        else
+                            cnt_punct++;
+                        }
+                    }
+                }
+            }
+
+        /* 0.98 -- split out the characters that CANNOT be preceded by space */
+        llen = strlen(aline);
+        for (i = 1; i < llen; i++) {                             /* for each character in the line after the first */
+            if  (strchr("?!,;:", aline[i])) {                    /* if it's punctuation that _cannot_ have a space before it */
+                if (aline[i-1] == CHAR_SPACE && !isemptyline && aline[i+1] != CHAR_SPACE) { /* if aline[i+1) DOES == space, it was already reported just above */
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
+                    else
+                        cnt_punct++;
+                    }
+                }
+            }
+
+
+        /* 0.99 -- special case " .X" where X is any alpha. */
+        /* This plugs a hole in the acronym code above. Inelegant, but maintainable. */
+        llen = strlen(aline);
+        for (i = 1; i < llen; i++) {             /* for each character in the line after the first */
+            if  (aline[i] == '.') {              /* if it's a period */
+                if (aline[i-1] == CHAR_SPACE && gcisalpha(aline[i+1])) { /* if the period follows a space and is followed by a letter */
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
+                    else
+                        cnt_punct++;
+                    }
+                }
+            }
+
+
+
+
+        /* v.21 breaking out the search for unspaced doublequotes        */
+        /* This is not as efficient, but it's more maintainable          */
+        /* V.97 added underscore to the list of characters not to query, */
+        /* since underscores are commonly used as italics indicators.    */
+        /* V.98 Added slash as well, same reason.                        */
+        for (i = 1; i < llen; i++) {                               /* for each character in the line after the first */
+            if (aline[i] == CHAR_DQUOTE) {
+                if ((!strchr(" _-.'`,;:!/([{?}])",  aline[i-1]) &&
+                     !strchr(" _-.'`,;:!/([{?}])",  aline[i+1]) &&
+                     aline[i+1] != 0
+                     || (!strchr(" _-([{'`", aline[i-1]) && gcisalpha(aline[i+1])))) {
+                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                        if (!pswit[OVERVIEW_SWITCH])
+                            printf("    Line %ld column %d - Unspaced quotes?\n", linecnt, i+1);
+                        else
+                            cnt_punct++;
+                        }
+                }
+            }
+
+
+        /* v.98 check parity of quotes                             */
+        /* v.99 added !*(s+1) in some tests to catch "I am," he said, but I will not be soon". */
+        for (s = aline; *s; s++) {
+            if (*s == CHAR_DQUOTE) {
+                if (!(dquotepar = !dquotepar)) {    /* parity even */
+                    if (!strchr("_-.'`/,;:!?)]} ",  *(s+1))) {
+                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                        if (!pswit[OVERVIEW_SWITCH])
+                            printf("    Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
+                        else
+                            cnt_punct++;
+                        }
+                    }
+                else {                              /* parity odd */
+                    if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/.'`([{$",  *(s+1)) || !*(s+1)) {
+                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                        if (!pswit[OVERVIEW_SWITCH])
+                            printf("    Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
+                        else
+                            cnt_punct++;
+                        }
+                    }
+                }
+            }
+
+            if (*aline == CHAR_DQUOTE) {
+                if (strchr(",;:!?)]} ", aline[1])) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column 1 - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
+                    else
+                        cnt_punct++;
+                    }
+                }
+
+        if (pswit[SQUOTE_SWITCH])
+            for (s = aline; *s; s++) {
+                if ((*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
+                     && ( s == aline || (s > aline && !gcisalpha(*(s-1))) || !gcisalpha(*(s+1)))) {
+                    if (!(squotepar = !squotepar)) {    /* parity even */
+                        if (!strchr("_-.'`/\",;:!?)]} ",  *(s+1))) {
+                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                            if (!pswit[OVERVIEW_SWITCH])
+                                printf("    Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
+                            else
+                                cnt_punct++;
+                            }
+                        }
+                    else {                              /* parity odd */
+                        if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/\".'`",  *(s+1)) || !*(s+1)) {
+                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                            if (!pswit[OVERVIEW_SWITCH])
+                                printf("    Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
+                            else
+                                cnt_punct++;
+                            }
+                        }
+                    }
+                }
+                    
+
+        /* v.20 also look for double punctuation like ,. or ,,     */
+        /* Thanks to DW for the suggestion!                        */
+        /* I'm putting this in a separate loop for clarity         */
+        /* In books with references, ".," and ".;" are common      */
+        /* e.g. "etc., etc.," and vol. 1.; vol 3.;                 */
+        /* OTOH, from my initial tests, there are also fairly      */
+        /* common errors. What to do? Make these cases paranoid?   */
+        /* V.21 ".," is the most common, so invented warn_dotcomma */
+        /* to suppress detailed reporting if it occurs often       */
+        llen = strlen(aline);
+        for (i = 0; i < llen; i++)                  /* for each character in the line */
+            if (strchr(".?!,;:", aline[i])          /* if it's punctuation */
+            && (strchr(".?!,;:", aline[i+1]))
+            && aline[i] && aline[i+1])      /* followed by punctuation, it's a query, unless . . . */
+                if (
+                  (aline[i] == aline[i+1]
+                  && (aline[i] == '.' || aline[i] == '?' || aline[i] == '!'))
+                  || (!warn_dotcomma && aline[i] == '.' && aline[i+1] == ',')
+                  || (isFrench && !strncmp(aline+i, ",...", 4))
+                  || (isFrench && !strncmp(aline+i, "...,", 4))
+                  || (isFrench && !strncmp(aline+i, ";...", 4))
+                  || (isFrench && !strncmp(aline+i, "...;", 4))
+                  || (isFrench && !strncmp(aline+i, ":...", 4))
+                  || (isFrench && !strncmp(aline+i, "...:", 4))
+                  || (isFrench && !strncmp(aline+i, "!...", 4))
+                  || (isFrench && !strncmp(aline+i, "...!", 4))
+                  || (isFrench && !strncmp(aline+i, "?...", 4))
+                  || (isFrench && !strncmp(aline+i, "...?", 4))
+                ) {
+                if ((isFrench && !strncmp(aline+i, ",...", 4))    /* could this BE any more awkward? */
+                  || (isFrench && !strncmp(aline+i, "...,", 4))
+                  || (isFrench && !strncmp(aline+i, ";...", 4))
+                  || (isFrench && !strncmp(aline+i, "...;", 4))
+                  || (isFrench && !strncmp(aline+i, ":...", 4))
+                  || (isFrench && !strncmp(aline+i, "...:", 4))
+                  || (isFrench && !strncmp(aline+i, "!...", 4))
+                  || (isFrench && !strncmp(aline+i, "...!", 4))
+                  || (isFrench && !strncmp(aline+i, "?...", 4))
+                  || (isFrench && !strncmp(aline+i, "...?", 4)))
+                    i +=4;
+                        ; /* do nothing for .. !! and ?? which can be legit */
+                    }
+                else {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Double punctuation?\n", linecnt, i+1);
+                    else
+                        cnt_punct++;
+                    }
+
+        /* v.21 breaking out the search for spaced doublequotes */
+        /* This is not as efficient, but it's more maintainable */
+        s = aline;
+        while (strstr(s," \" ")) {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column %d - Spaced doublequote?\n", linecnt, (int)(strstr(s," \" ")-aline+1));
+            else
+                cnt_punct++;
+            s = strstr(s," \" ") + 2;
+            }
+
+        /* v.20 also look for spaced singlequotes ' and `  */
+        s = aline;
+        while (strstr(s," ' ")) {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ' ")-aline+1));
+            else
+                cnt_punct++;
+            s = strstr(s," ' ") + 2;
+            }
+
+        s = aline;
+        while (strstr(s," ` ")) {
+            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+            if (!pswit[OVERVIEW_SWITCH])
+                printf("    Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ` ")-aline+1));
+            else
+                cnt_punct++;
+            s = strstr(s," ` ") + 2;
+            }
+
+        /* v.99 check special case of 'S instead of 's at end of word */
+        s = aline + 1;
+        while (*s) {
+            if (*s == CHAR_SQUOTE && *(s+1) == 'S' && *(s-1)>='a' && *(s-1)<='z')  {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column %d - Capital \"S\"?\n", linecnt, (int)(s-aline+2));
+                else
+                    cnt_punct++;
+                }
+            s++;
+            }
+
+
+        /* v.21 Now check special cases - start and end of line - */
+        /* for single and double quotes. Start is sometimes [sic] */
+        /* but better to query it anyway.                         */
+        /* While I'm here, check for dash at end of line          */
+        llen = strlen(aline);
+        if (llen > 1) {
+            if (aline[llen-1] == CHAR_DQUOTE ||
+                aline[llen-1] == CHAR_SQUOTE ||
+                aline[llen-1] == CHAR_OPEN_SQUOTE)
+                if (aline[llen-2] == CHAR_SPACE) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Spaced quote?\n", linecnt, llen);
+                    else
+                        cnt_punct++;
+                    }
+            
+            /* V 0.98 removed aline[0] == CHAR_DQUOTE from the test below, since */
+            /* Wrongspaced quotes test also catches it for "                     */
+            if (aline[0] == CHAR_SQUOTE ||
+                aline[0] == CHAR_OPEN_SQUOTE)
+                if (aline[1] == CHAR_SPACE) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column 1 - Spaced quote?\n", linecnt);
+                    else
+                        cnt_punct++;
+                    }
+            /* dash at end of line may well be legit - paranoid mode only */
+            /* and don't report em-dash at line-end                       */
+            if (pswit[PARANOID_SWITCH] && warn_hyphen) {
+                for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
+                if (aline[i] == '-' && aline[i-1] != '-') {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld column %d - Hyphen at end of line?\n", linecnt, i);
+                    }
+                }
+            }
+
+        /* v.21 also look for brackets surrounded by alpha                    */
+        /* Brackets are often unspaced, but shouldn't be surrounded by alpha. */
+        /* If so, suspect a scanno like "a]most"                              */
+        llen = strlen(aline);
+        for (i = 1; i < llen-1; i++) {           /* for each character in the line except 1st & last*/
+            if (strchr("{[()]}", aline[i])         /* if it's a bracket */
+                && gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column %d - Unspaced bracket?\n", linecnt, i);
+                else
+                    cnt_punct++;
+                }
+            }
+        /* The "Cinderella" case, back in again! :-S Give it another shot */
+        if (warn_endquote) {
+            llen = strlen(aline);
+            for (i = 1; i < llen; i++) {           /* for each character in the line except 1st */
+                if (aline[i] == CHAR_DQUOTE)
+                    if (isalpha(aline[i-1])) {
+                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                        if (!pswit[OVERVIEW_SWITCH])
+                            printf("    Line %ld column %d - endquote missing punctuation?\n", linecnt, i);
+                        else
+                            cnt_punct++;
+                        }
+                }
+            }
+
+        llen = strlen(aline);
+
+        /* Check for <HTML TAG> */
+        /* If there is a < in the line, followed at some point  */
+        /* by a > then we suspect HTML                          */
+        if (strstr(aline, "<") && strstr(aline, ">")) {
+            i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
+            if (i > 0) {
+                strncpy(wrk, strstr(aline, "<"), i);
+                wrk[i] = 0;
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column %d - HTML Tag? %s \n", linecnt, (int)(strstr(aline, "<") - aline) + 1, wrk);
+                else
+                    cnt_html++;
+                }
+            }
+
+        /* Check for &symbol; HTML                   */
+        /* If there is a & in the line, followed at  */
+        /* some point by a ; then we suspect HTML    */
+        if (strstr(aline, "&") && strstr(aline, ";")) {
+            i = (int)(strstr(aline, ";") - strstr(aline, "&") + 1);
+            for (s = strstr(aline, "&"); s < strstr(aline, ";"); s++)   
+                if (*s == CHAR_SPACE) i = 0;                /* 0.99 don't report "Jones & Son;" */
+            if (i > 0) {
+                strncpy(wrk, strstr(aline,"&"), i);
+                wrk[i] = 0;
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column %d - HTML symbol? %s \n", linecnt, (int)(strstr(aline, "&") - aline) + 1, wrk);
+                else
+                    cnt_html++;
+                }
+            }
+
+        /* At end of paragraph, check for mismatched quotes.           */
+        /* We don't want to report an error immediately, since it is a */
+        /* common convention to omit the quotes at end of paragraph if */
+        /* the next paragraph is a continuation of the same speaker.   */
+        /* Where this is the case, the next para should begin with a   */
+        /* quote, so we store the warning message and only display it  */
+        /* at the top of the next iteration if the new para doesn't    */
+        /* start with a quote.                                         */
+        /* The -p switch overrides this default, and warns of unclosed */
+        /* quotes on _every_ paragraph, whether the next begins with a */
+        /* quote or not.                                               */
+        /* Version .16 - only report mismatched single quotes if       */
+        /* an open_single_quotes was found.                            */
+
+        if (isemptyline) {          /* end of para - add up the totals */
+            if (quot % 2)
+                sprintf(dquote_err, "    Line %ld - Mismatched quotes\n", linecnt);
+            if (pswit[SQUOTE_SWITCH] && open_single_quote && (open_single_quote != close_single_quote) )
+                sprintf(squote_err,"    Line %ld - Mismatched singlequotes?\n", linecnt);
+            if (pswit[SQUOTE_SWITCH] && open_single_quote
+                                     && (open_single_quote != close_single_quote)
+                                     && (open_single_quote != close_single_quote +1) )
+                squot = 1;    /* flag it to be noted regardless of the first char of the next para */
+            if (r_brack)
+                sprintf(rbrack_err, "    Line %ld - Mismatched round brackets?\n", linecnt);
+            if (s_brack)
+                sprintf(sbrack_err, "    Line %ld - Mismatched square brackets?\n", linecnt);
+            if (c_brack)
+                sprintf(cbrack_err, "    Line %ld - Mismatched curly brackets?\n", linecnt);
+            if (c_unders % 2)
+                sprintf(unders_err, "    Line %ld - Mismatched underscores?\n", linecnt);
+            quot = s_brack = c_brack = r_brack = c_unders =
+                open_single_quote = close_single_quote = 0;
+            isnewpara = 1;     /* let the next iteration know that it's starting a new para */
+            }
+
+        /* V.21 _ALSO_ at end of paragraph, check for omitted punctuation. */
+        /*      by working back through prevline. DW.                      */
+        /* Hmmm. Need to check this only for "normal" paras.               */
+        /* So what is a "normal" para? ouch!                               */
+        /* Not normal if one-liner (chapter headings, etc.)                */
+        /* Not normal if doesn't contain at least one locase letter        */
+        /* Not normal if starts with space                                 */
+
+        /* 0.99 tighten up on para end checks. Disallow comma and */
+        /* semi-colon. Check for legit para end before quotes.    */
+        if (isemptyline) {          /* end of para */
+            for (s = prevline, i = 0; *s && !i; s++)
+                if (gcisletter(*s))
+                    i = 1;    /* use i to indicate the presence of a letter on the line */
+            /* This next "if" is a problem.                                             */
+            /* If I say "start_para_line <= linecnt - 1", that includes one-line        */
+            /* "paragraphs" like chapter heads. Lotsa false positives.                  */
+            /* If I say "start_para_line < linecnt - 1" it doesn't, but then it         */
+            /* misses genuine one-line paragraphs.                                      */
+            /* So what do I do? */
+            if (i
+                && lastblen > 2
+                && start_para_line < linecnt - 1
+                && *prevline > CHAR_SPACE
+                ) {
+                for (i = strlen(prevline)-1; (prevline[i] == CHAR_DQUOTE || prevline[i] == CHAR_SQUOTE) && prevline[i] > CHAR_SPACE && i > 0; i--);
+                for (  ; i > 0; i--) {
+                    if (gcisalpha(prevline[i])) {
+                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
+                        if (!pswit[OVERVIEW_SWITCH])
+                            printf("    Line %ld column %d - No punctuation at para end?\n", linecnt-1, strlen(prevline));
+                        else
+                            cnt_punct++;
+                        break;
+                        }
+                    if (strchr("-.:!([{?}])", prevline[i]))
+                        break;
+                    }
+                }
+            }
+        strcpy(prevline, aline);
+    }
+    fclose (infile);
+    if (!pswit[OVERVIEW_SWITCH])
+        for (i = 0; i < MAX_QWORD; i++)
+            if (dupcnt[i])
+                printf("\nNote: Queried word %s was duplicated %d time%s\n", qword[i], dupcnt[i], "s");
+}
+
+
+
+/* flgets - get one line from the input stream, checking for   */
+/* the existence of exactly one CR/LF line-end per line.       */
+/* Returns a pointer to the line.                              */
+
+char *flgets(char *theline, int maxlen, FILE *thefile, long lcnt)
+{
+    char c;
+    int len, isCR, cint;
+
+    *theline = 0;
+    len = isCR = 0;
+    c = cint = fgetc(thefile);
+    do {
+        if (cint == EOF)
+            return (NULL);
+        if (c == 10)  /* either way, it's end of line */
+            if (isCR)
+                break;
+            else {   /* Error - a LF without a preceding CR */
+                if (pswit[LINE_END_SWITCH]) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld - No CR?\n", lcnt);
+                    else
+                        cnt_lineend++;
+                    }
+                break;
+                }
+        if (c == 13) {
+            if (isCR) { /* Error - two successive CRs */
+                if (pswit[LINE_END_SWITCH]) {
+                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
+                    if (!pswit[OVERVIEW_SWITCH])
+                        printf("    Line %ld - Two successive CRs?\n", lcnt);
+                    else
+                        cnt_lineend++;
+                    }
+                }
+            isCR = 1;
+            }
+        else {
+            if (pswit[LINE_END_SWITCH] && isCR) {
+                if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
+                if (!pswit[OVERVIEW_SWITCH])
+                    printf("    Line %ld column %d - CR without LF?\n", lcnt, len+1);
+                else
+                    cnt_lineend++;
+                }
+             theline[len] = c;
+             len++;
+             theline[len] = 0;
+             isCR = 0;
+             }
+        c = cint = fgetc(thefile);
+    } while(len < maxlen);
+    if (pswit[MARKUP_SWITCH])  
+        postprocess_for_HTML(theline);
+    if (pswit[DP_SWITCH])  
+        postprocess_for_DP(theline);
+    return(theline);
+}
+
+
+
+
+/* mixdigit - takes a "word" as a parameter, and checks whether it   */
+/* contains a mixture of alpha and digits. Generally, this is an     */
+/* error, but may not be for cases like 4th or L5 12s. 3d.           */
+/* Returns 0 if no error found, 1 if error.                          */
+
+int mixdigit(char *checkword)   /* check for digits like 1 or 0 in words */
+{
+    int wehaveadigit, wehavealetter, firstdigits, query, wl;
+    char *s;
+
+
+    wehaveadigit = wehavealetter = query = 0;
+    for (s = checkword; *s; s++)
+        if (gcisalpha(*s))
+            wehavealetter = 1;
+        else
+            if (gcisdigit(*s))
+                wehaveadigit = 1;
+    if (wehaveadigit && wehavealetter) {         /* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
+        query = 1;
+        wl = strlen(checkword);
+        for (firstdigits = 0; gcisdigit(checkword[firstdigits]); firstdigits++)
+            ;
+        /* digits, ending in st, rd, nd, th of either case */
+        /* 0.99 donovan points out an error below. Turns out */
+        /*      I was using matchword like strcmp when the   */
+        /*      return values are different! Duh.            */
+        if (firstdigits + 2 == wl &&
+              (matchword(checkword + wl - 2, "st")
+            || matchword(checkword + wl - 2, "rd")
+            || matchword(checkword + wl - 2, "nd")
+            || matchword(checkword + wl - 2, "th"))
+            )
+                query = 0;
+        if (firstdigits + 3 == wl &&
+              (matchword(checkword + wl - 3, "sts")
+            || matchword(checkword + wl - 3, "rds")
+            || matchword(checkword + wl - 3, "nds")
+            || matchword(checkword + wl - 3, "ths"))
+            )
+                query = 0;
+        if (firstdigits + 3 == wl &&
+              (matchword(checkword + wl - 4, "stly")
+            || matchword(checkword + wl - 4, "rdly")
+            || matchword(checkword + wl - 4, "ndly")
+            || matchword(checkword + wl - 4, "thly"))
+            )
+                query = 0;
+
+        /* digits, ending in l, L, s or d */
+        if (firstdigits + 1 == wl &&
+            (checkword[wl-1] == 'l'
+            || checkword[wl-1] == 'L'
+            || checkword[wl-1] == 's'
+            || checkword[wl-1] == 'd'))
+                query = 0;
+        /* L at the start of a number, representing Britsh pounds, like L500  */
+        /* This is cute. We know the current word is mixeddigit. If the first */
+        /* letter is L, there must be at least one digit following. If both   */
+        /* digits and letters follow, we have a genuine error, else we have a */
+        /* capital L followed by digits, and we accept that as a non-error.   */
+        if (checkword[0] == 'L')
+            if (!mixdigit(checkword+1))
+                query = 0;
+        }
+    return (query);
+}
+
+
+
+
+/* getaword - extracts the first/next "word" from the line, and puts */
+/* it into "thisword". A word is defined as one English word unit    */
+/* -- or at least that's what I'm trying for.                        */
+/* Returns a pointer to the position in the line where we will start */
+/* looking for the next word.                                        */
+
+char *getaword(char *fromline, char *thisword)
+{
+    int i, wordlen;
+    char *s;
+
+    wordlen = 0;
+    for ( ; !gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline ; fromline++ );
+
+    /* V .20                                                                   */
+    /* add a look-ahead to handle exceptions for numbers like 1,000 and 1.35.  */
+    /* Especially yucky is the case of L1,000                                  */
+    /* I hate this, and I see other ways, but I don't see that any is _better_.*/
+    /* This section looks for a pattern of characters including a digit        */
+    /* followed by a comma or period followed by one or more digits.           */
+    /* If found, it returns this whole pattern as a word; otherwise we discard */
+    /* the results and resume our normal programming.                          */
+    s = fromline;
+    for (  ; (gcisdigit(*s) || gcisalpha(*s) || *s == ',' || *s == '.') && wordlen < MAXWORDLEN ; s++ ) {
+        thisword[wordlen] = *s;
+        wordlen++;
+        }
+    thisword[wordlen] = 0;
+    for (i = 1; i < wordlen -1; i++) {
+        if (thisword[i] == '.' || thisword[i] == ',') {
+            if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1])) {   /* we have one of the damned things */
+                fromline = s;
+                return(fromline);
+                }
+            }
+        }
+
+    /* we didn't find a punctuated number - do the regular getword thing */
+    wordlen = 0;
+    for (  ; (gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline == '\'') && wordlen < MAXWORDLEN ; fromline++ ) {
+        thisword[wordlen] = *fromline;
+        wordlen++;
+        }
+    thisword[wordlen] = 0;
+    return(fromline);
+}
+
+
+
+
+
+/* matchword - just a case-insensitive string matcher    */
+/* yes, I know this is not efficient. I'll worry about   */
+/* that when I have a clear idea where I'm going with it.*/
+
+int matchword(char *checkfor, char *thisword)
+{
+    unsigned int ismatch, i;
+
+    if (strlen(checkfor) != strlen(thisword)) return(0);
+
+    ismatch = 1;     /* assume a match until we find a difference */
+    for (i = 0; i <strlen(checkfor); i++)
+        if (toupper(checkfor[i]) != toupper(thisword[i]))
+            ismatch = 0;
+    return (ismatch);
+}
+
+
+
+
+
+/* lowerit - lowercase the line. Yes, strlwr does the same job,  */
+/* but not on all platforms, and I'm a bit paranoid about what   */
+/* some implementations of tolower might do to hi-bit characters,*/
+/* which shouldn't matter, but better safe than sorry.           */
+
+void lowerit(char *theline)
+{
+    for ( ; *theline; theline++)
+        if (*theline >='A' && *theline <='Z')
+            *theline += 32;
+}
+
+
+/* Is this word a Roman Numeral?                                    */
+/* v 0.99 improved to be better. It still doesn't actually          */
+/* validate that the number is a valid Roman Numeral -- for example */
+/* it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not*/
+/* what we're here to do. If it passes this, it LOOKS like a Roman  */
+/* numeral. Anyway, the actual Romans were pretty tolerant of bad   */
+/* arithmetic, or expressions thereof, except when it came to taxes.*/
+/* Allow any number of M, an optional D, an optional CM or CD,      */
+/* any number of optional Cs, an optional XL or an optional XC, an  */
+/* optional IX or IV, an optional V and any number of optional Is.  */
+/* Good enough for jazz chords.                                     */
+
+int isroman(char *t)
+{
+    char *s;
+
+    if (!t || !*t) return (0);
+
+    s = t;
+
+    while (*t == 'm' && *t ) t++;
+    if (*t == 'd') t++;
+    if (*t == 'c' && *(t+1) == 'm') t+=2;
+    if (*t == 'c' && *(t+1) == 'd') t+=2;
+    while (*t == 'c' && *t) t++;
+    if (*t == 'x' && *(t+1) == 'l') t+=2;
+    if (*t == 'x' && *(t+1) == 'c') t+=2;
+    if (*t == 'l') t++;
+    while (*t == 'x' && *t) t++;
+    if (*t == 'i' && *(t+1) == 'x') t+=2;
+    if (*t == 'i' && *(t+1) == 'v') t+=2;
+    if (*t == 'v') t++;
+    while (*t == 'i' && *t) t++;
+    if (!*t) return (1);
+
+    return(0);
+}
+
+
+
+
+/* gcisalpha is a special version that is somewhat lenient on 8-bit texts.     */
+/* If we use the standard isalpha() function, 8-bit accented characters break  */
+/* words, so that tete with accented characters appears to be two words, "t"   */
+/* and "t", with 8-bit characters between them. This causes over-reporting of  */
+/* errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)   */
+/* and ISO-8859-1 character sets, which are the most common PG 8-bit types.    */
+
+int gcisalpha(unsigned char c)
+{
+    if (c >='a' && c <='z') return(1);
+    if (c >='A' && c <='Z') return(1);
+    if (c < 140) return(0);
+    if (c >=192 && c != 208 && c != 215 && c != 222 && c != 240 && c != 247 && c != 254) return(1);
+    if (c == 140 || c == 142 || c == 156 || c == 158 || c == 159) return (1);
+    return(0);
+}
+
+/* gcisdigit is a special version that doesn't get confused in 8-bit texts.    */
+int gcisdigit(unsigned char c)
+{   
+    if (c >= '0' && c <='9') return(1);
+    return(0);
+}
+
+/* gcisletter is a special version that doesn't get confused in 8-bit texts.    */
+/* Yeah, we're ISO-8891-1-specific. So sue me.                                  */
+int gcisletter(unsigned char c)
+{   
+    if ((c >= 'A' && c <='Z') || (c >= 'a' && c <='z') || c >= 192) return(1);
+    return(0);
+}
+
+
+
+
+/* gcstrchr wraps strchr to return NULL if the character being searched for is zero */
+
+char *gcstrchr(char *s, char c)
+{
+    if (c == 0) return(NULL);
+    return(strchr(s,c));
+}
+
+/* postprocess_for_DP is derived from postprocess_for_HTML          */
+/* It is invoked with the -d switch from flgets().                  */
+/* It simply "removes" from the line a hard-coded set of common     */
+/* DP-specific tags, so that the line passed to the main routine has*/
+/* been pre-cleaned of DP markup.                                   */
+
+void postprocess_for_DP(char *theline)
+{
+
+    char *s, *t;
+    int i;
+
+    if (!*theline) 
+        return;
+
+    for (i = 0; *DPmarkup[i]; i++) {
+        s = strstr(theline, DPmarkup[i]);
+        while (s) {
+            t = s + strlen(DPmarkup[i]);
+            while (*t) {
+                *s = *t;
+                t++; s++;
+                }
+            *s = 0;
+            s = strstr(theline, DPmarkup[i]);
+            }
+        }
+
+}
+
+
+/* postprocess_for_HTML is, at the moment (0.97), a very nasty      */
+/* short-term fix for Charlz. Nasty, nasty, nasty.                  */
+/* It is invoked with the -m switch from flgets().                  */
+/* It simply "removes" from the line a hard-coded set of common     */
+/* HTML tags and "replaces" a hard-coded set of common HTML         */
+/* entities, so that the line passed to the main routine has        */
+/* been pre-cleaned of HTML. This is _so_ not the right way to      */
+/* deal with HTML, but what Charlz needs now is not HTML handling   */
+/* proper: just ignoring <i> tags and some others.                  */
+/* To be revisited in future releases!                              */
+
+void postprocess_for_HTML(char *theline)
+{
+
+    if (strstr(theline, "<") && strstr(theline, ">"))
+        while (losemarkup(theline))
+            ;
+    while (loseentities(theline))
+        ;
+}
+
+char *losemarkup(char *theline)
+{
+    char *s, *t;
+    int i;
+
+    if (!*theline) 
+        return(NULL);
+
+    s = strstr(theline, "<");
+    t = strstr(theline, ">");
+    if (!s || !t) return(NULL);
+    for (i = 0; *markup[i]; i++)
+        if (!tagcomp(s+1, markup[i])) {
+            if (!*(t+1)) {
+                *s = 0;
+                return(s);
+                }
+            else
+                if (t > s) {
+                    strcpy(s, t+1);
+                    return(s);
+                    }
+        }
+    /* it's an unrecognized <xxx> */
+    return(NULL);
+}
+
+char *loseentities(char *theline)
+{
+    int i;
+    char *s, *t;
+
+    if (!*theline) 
+        return(NULL);
+
+    for (i = 0; *entities[i].htmlent; i++) {
+        s = strstr(theline, entities[i].htmlent);
+        if (s) {
+            t = malloc((size_t)strlen(s));
+            if (!t) return(NULL);
+            strcpy(t, s + strlen(entities[i].htmlent));
+            strcpy(s, entities[i].textent);
+            strcat(s, t);
+            free(t);
+            return(theline);
+            }
+        }
+
+    /* V0.97 Duh. Forgot to check the htmlnum member */
+    for (i = 0; *entities[i].htmlnum; i++) {
+        s = strstr(theline, entities[i].htmlnum);
+        if (s) {
+            t = malloc((size_t)strlen(s));
+            if (!t) return(NULL);
+            strcpy(t, s + strlen(entities[i].htmlnum));
+            strcpy(s, entities[i].textent);
+            strcat(s, t);
+            free(t);
+            return(theline);
+            }
+        }
+    return(NULL);
+}
+
+
+int tagcomp(char *strin, char *basetag)
+{
+    char *s, *t;
+
+    s = basetag;
+    t  = strin;
+    if (*t == '/') t++; /* ignore a slash */
+    while (*s && *t) {
+        if (tolower(*s) != tolower(*t)) return(1);
+        s++; t++;
+        }
+    /* OK, we have < followed by a valid tag start  */
+    /* should I do something about length?          */
+    /* this is messy. The length of an <i> tag is   */
+    /* limited, but a <table> could go on for miles */
+    /* so I'd have to parse the tags . . . ugh.     */
+    /* It isn't what Charlz needs now, so mark it   */
+    /* as 'pending'.                                */
+    return(0);
+}
+
+void proghelp()                  /* explain program usage here */
+{
+    fputs("V. 0.991. Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
+    fputs("Gutcheck comes wih ABSOLUTELY NO WARRANTY. For details, read the file COPYING.\n", stderr);
+    fputs("This is Free Software; you may redistribute it under certain conditions (GPL);\n", stderr);
+    fputs("read the file COPYING for details.\n\n", stderr);
+    fputs("Usage is: gutcheck [-setpxloyhud] filename\n",stderr);
+    fputs("  where -s checks single quotes, -e suppresses echoing lines, -t checks typos\n",stderr);
+    fputs("  -x (paranoid) switches OFF -t and extra checks, -l turns OFF line-end checks\n",stderr);
+    fputs("  -o just displays overview without detail, -h echoes header fields\n",stderr);
+    fputs("  -v (verbose) unsuppresses duplicate reporting, -m suppresses markup\n",stderr);
+    fputs("  -d ignores DP-specific markup,\n",stderr);
+    fputs("  -u uses a file gutcheck.typ to query user-defined possible typos\n",stderr);
+    fputs("Sample usage: gutcheck warpeace.txt \n",stderr);
+    fputs("\n",stderr);
+    fputs("Gutcheck looks for errors in Project Gutenberg(TM) etexts.\n", stderr);
+    fputs("Gutcheck queries anything it thinks shouldn't be in a PG text; non-ASCII\n",stderr);
+    fputs("characters like accented letters, lines longer than 75 or shorter than 55,\n",stderr);
+    fputs("unbalanced quotes or brackets, a variety of badly formatted punctuation, \n",stderr);
+    fputs("HTML tags, some likely typos. It is NOT a substitute for human judgement.\n",stderr);
+    fputs("\n",stderr);
+}
+
+
+
+/*********************************************************************
+  Revision History:
+
+  04/22/01 Cleaned up some stuff and released .10
+
+           ---------------
+
+  05/09/01 Added the typo list, added two extra cases of he/be error,
+           added -p switch, OPEN_SINGLE QUOTE char as .11
+
+           ---------------
+
+  05/20/01 Increased the typo list,
+           added paranoid mode,
+           ANSIfied the code and added some casts
+              so the compiler wouldn't keep asking if I knew what I was doing,
+           fixed bug in l.s.d. condition (thanks, Dave!),
+           standardized spacing when echoing,
+           added letter-combo checking code to typo section,
+           added more h/b words to typo array.
+           Not too sure about putting letter combos outside of the TYPO conditions -
+           someone is sure to have a book about the tbaka tribe, or something. Anyway, let's see.
+           Released as .12
+
+           ---------------
+
+  06/01/01 Removed duplicate reporting of Tildes, asterisks, etc.
+  06/10/01 Added flgets routine to help with platform-independent
+           detection of invalid line-ends. All PG text files should
+           have CR/LF (13/10) at end of line, regardless of system.
+           Gutcheck now validates this by default. (Thanks, Charles!)
+           Released as .13
+
+           ---------------
+
+  06/11/01 Added parenthesis match checking. (c_brack, cbrack_err etc.)
+           Released as .14
+
+           ---------------
+
+  06/23/01 Fixed: 'No',he said. not being flagged.
+
+           Improved: better single-quotes checking:
+
+           Ignore singlequotes surrounded by alpha, like didn't. (was OK)
+
+           If a singlequote is at the END of a word AND the word ends in "s":
+                  The dogs' tails wagged.
+           it's probably an apostrophe, but less commonly may be a closequote:
+                  "These 'pack dogs' of yours look more like wolves."
+
+           If it's got punctuation before it and is followed by a space
+           or punctuation:
+              . . . was a problem,' he said
+              . . . was a problem,'"
+           it is probably (certainly?) a closequote.
+
+           If it's at start of paragraph, it's probably an openquote.
+              (but watch dialect)
+
+           Words with ' at beginning and end are probably quoted:
+               "You have the word 'chivalry' frequently on your lips."
+               (Not specifically implemented)
+           V.18 I'm glad I didn't implement this, 'cos it jest ain't so
+           where the convention is to punctuate outside the quotes.
+               'Come', he said, 'and join the party'.
+
+           If it is followed by an alpha, and especially a capital:
+              'Hello,' called he.
+           it is either an openquote or dialect.
+
+           Dialect breaks ALL the rules:
+                  A man's a man for a' that.
+                  "Aye, but 'tis all in the pas' now."
+                  "'Tis often the way," he said.
+                  'Ave a drink on me.
+
+           This version looks to be an improvement, and produces
+           fewer false positives, but is still not perfect. The
+           'pack dogs' case still fools it, and dialect is still
+           a problem. Oh, well, it's an improvement, and I have
+           a weighted structure in place for refining guesses at
+           closequotes. Maybe next time, I'll add a bit of logic
+           where if there is an open quote and one that was guessed
+           to be a possessive apostrophe after s, I'll re-guess it
+           to be a closequote. Let's see how this one flies, first.
+
+           (Afterview: it's still crap. Needs much work, and a deeper insight.)
+
+           Released as .15
+
+           TODO: More he/be checks. Can't be perfect - counterexamples:
+              I gave my son good advice: be married regardless of the world's opinion.
+              I gave my son good advice: he married regardless of the world's opinion.
+
+              If by "primitive" be meant "crude", we can understand the sentence.
+              If by "primitive" he meant "crude", we can understand the sentence.
+
+              No matter what be said, I must go on.
+              No matter what he said, I must go on.
+
+              No value, however great, can be set upon them.
+              No value, however great, can he set upon them.
+
+              Real-Life one from a DP International Weekly Miscellany:
+                He wandered through the forest without fear, sleeping
+                much, for in sleep be had companionship--the Great
+                Spirit teaching him what he should know in dreams.
+                That one found by jeebies, and it turned out to be "he".
+
+
+           ---------------
+
+  07/01/01 Added -O option.
+           Improved singlequotes by reporting mismatched single quotes
+           only if an open_single_quotes was found.
+
+           Released as .16
+
+           ---------------
+
+  08/27/01 Added -Y switch for Robert Rowe to allow his app to
+           catch the error output.
+
+           Released as .17
+
+           ---------------
+
+  09/08/01 Added checking Capitals at start of paragraph, but not
+           checking them at start of sentence.
+
+           TODO: Parse sentences out so can check reliably for start of
+                 sentence. Need a whole different approach for that.
+                 (Can't just rely on periods, since they are also
+                 used for abbreviations, etc.)
+
+           Added checking for all vowels or all consonants in a word.
+
+           While I was in, I added "ii" checking and "tl" at start of word.
+
+           Added echoing of first line of paragraph when reporting
+           mismatched quoted or brackets (thanks to David Widger for the
+           suggestion)
+
+           Not querying L at start of a number (used for British pounds).
+
+           The spelling changes are sort of half-done but released anyway
+           Skipped .18 because I had given out a couple of test versions
+           with that number.
+
+  09/25/01 Released as .19
+
+           ---------------
+
+           TODO:
+           Use the logic from my new version of safewrap to stop querying
+             short lines like poems and TOCs.
+           Ignore non-standard ellipses like .  .  . or ...
+
+
+           ---------------
+  10/01/01 Made any line over 80 a VERY long line (was 85).
+           Recognized openquotes on indented paragraphs as continuations
+               of the same speech.
+           Added "cf" to the okword list (how did I forget _that_?) and a few others.
+           Moved abbrev to okword and made it more general.
+           Removed requirement that PG_space_emdash be greater than
+               ten before turning off warnings about spaced dashes.
+           Added period to list of characters that might constitute a separator line.
+           Now checking for double punctuation (Thanks, David!)
+           Now if two spaced em-dashes on a line, reports both. (DW)
+           Bug: Wasn't catching spaced punctuation at line-end since I
+               added flgets in version .13 - fixed.
+           Bug: Wasn't catching spaced singlequotes - fixed
+           Now reads punctuated numbers like 1,000 as a single word.
+               (Used to give "standalone 1" type  queries)
+           Changed paranoid mode - not including s and p options. -ex is now quite usable.
+           Bug: was calling `"For it is perfectly impossible,"    Unspaced Quotes - fixed
+           Bug: Sometimes gave _next_ line number for queried word at end of line - fixed
+
+  10/22/01 Released as .20
+
+           ---------------
+
+           Added count of lines with spaces at end. (cnt_spacend) (Thanks, Brett!)
+           Reduced the number of hi-bit letters needed to stop reporting them
+               from 1/20 to 1/100 or 200 in total.
+           Added PG footer check.
+           Added the -h switch.
+           Fixed platform-specific CHAR_EOL checking for isemptyline - changed to 13 and 10
+           Not reporting ".," when there are many of them, such as a book with many references to "Vol 1., p. 23"
+           Added unspaced brackets check when surrounded by alpha.
+           Removed all typo reporting unless the typo switch is on.
+           Added gcisalpha to ease over-reporting of 8-bit queries.
+           ECHO_SWITCH is now ON by default!
+           PARANOID_SWITCH is now ON by default!
+           Checking for ">From" placed there by e-mail MTA (Thanks Andrew & Greg)
+           Checking for standalone lowercase "l"
+           Checking for standalone lowercase "s"
+           Considering "is be" and "be is" "be was" "was be" as he/be errors
+           Looking at punct at end of para
+
+  01/20/02 Released as .21
+
+           ---------------
+
+           Added VERBOSE_SWITCH to make it list everything. (George Davis)
+
+           ---------------
+
+  02/17/02 Added cint in flgets to try fix an EOF failure on a compiler I don't have.
+           after which
+           This line caused a coredump on Solaris - fixed.
+                Da sagte die Figur: " Das ist alles gar schoen, und man mag die Puppe
+  03/09/02 Changed header recognition for another header change
+           Called it .24
+  03/29/02 Added qword[][] so I can suppress massive overreporting
+           of queried "words" like "FN", "Wm.", "th'", people's 
+           initials, chemical formulae and suchlike in some texts.
+           Called it .25
+  04/07/02 The qword summary reports at end shouldn't show in OVERVIEW mode. Fixed.
+           Added linecounts in overview mode.
+           Wow! gutcheck gutcheck.exe doesn't report a binary! :-) Need to tighten up. Done.
+           "m" is a not uncommon scanno for "in", but also appears in "a.m." - Can I get round that?
+  07/07/02 Added GPL.
+           Added checking for broken em-dash at line-end (enddash)
+           Released as 0.95
+  08/17/02 Fixed a bug that treated some hi-bit characters as spaces. Thanks, Carlo.
+           Released as 0.96
+  10/10/02 Suppressing some annoying multiple reports by default:
+           Standalone Ones, Asterisks, Square Brackets.
+              Digit 1 occurs often in many scientific texts.
+              Asterisk occurs often in multi-footnoted texts.
+              Mismatch Square Brackets occurs often in multi-para footnotes.
+           Added -m switch for Charlz. Horrible. Nasty. Kludgy. Evil.
+              . . . but it does more or less work for the main cases.
+           Removed uppercase within a word as a separate category so
+           that names like VanAllen get reported only once, like other
+           suspected typos.
+  11/24/02 Fixed - -m switch wasn't looking at htmlnum in
+           loseentities (Thanks, Brett!)
+           Fixed bug which occasionally gave false warning of
+           paragraph starting with lowercase.
+           Added underscore as character not to query around doublequotes.
+           Split the "Non-ASCII" message into "Non-ASCII" vs. "Non-ISO-8859"
+           . . . this is to help detect things like CP1252 characters.
+           Released as 0.97
+
+  12/01/02 Hacked a simplified version of the "Wrongspaced quotes" out of gutspell,
+           for doublequotes only. Replaces "Spaced quote", since it also covers that
+           case.
+           Added "warn_hyphen" to ease over-reporting of hyphens.
+
+  12/20/02 Added "extra period" checks.
+           Added single character line check
+           Added I" check - is usually an exclam
+           Released as 0.98
+
+  1/5/03   Eeek! Left in a lowerit(argv[0]) at the start before procfile()
+           from when I was looking at ways to identify markup. Refuses to
+           open files for *nix users with upcase in the filemanes. Removed.
+           Fixed quickly and released as 0.981
+
+  1/8/03   Added "arid" to the list of typos, slightly against my better
+           judgement, but the DP gang are all excited about it. :-)
+           Added a check for comma followed by capital letter, where
+           a period has OCRed into a comma. (DW). Not sure about this
+           either; we'll see.
+           Compiling for Win32 to allow longfilenames.
+
+  6/1/04   A messy test release for DW to include the "gutcheck.typ"
+           process. And the gutcheck.jee trials. Removed "arid" --
+           it can go in gutcheck.typ
+
+           Added checks for carats ^ and slants / but disabling slant
+           queries if more than 20 of them, because some people use them
+           for /italics/. Slants are commonly mistaken italic "I"s.
+
+           Later: removed gutcheck.jee -- wrote jeebies instead.
+
+Random TODO: 
+           Check brackets more closely, like quotes, so that it becomes
+           easy to find the error in long paragraphs full of brackets.
+
+
+  11/4/04  Assorted cleanup. Fixed case where text started with an
+           unbalanced paragraph.
+
+  1/2/05   Has it really been that long? Added "nocomma", "noperiod" check.
+           Bits and pieces: improved isroman(). Added isletter().
+           Other stuff I never noted before this.
+
+  7/3/05   Stuck in a quick start on DP-markup ignoring 
+           at BillFlis's suggestion.
+
+  1/23/06  Took out nocomma etc if typos are off. Why did I ever leave that in?
+           Don't count footer for dotcomma etc.
+
+
+1       I
+ail     all
+arc     are
+arid    and
+bad     had
+ball    hall
+band    hand
+bar     her
+bat     but
+be      he
+bead    head
+beads   heads
+bear    hear
+bit     hit
+bo      be
+boon    been
+borne   home
+bow     how
+bumbled humbled
+car     ear
+carnage carriage
+carne   came
+cast    east
+cat     cut
+cat     eat
+cheek   check
+clay    day
+coining coming
+comer   corner
+die     she
+docs    does
+ease    case
+fail    fall
+fee     he
+haying  having
+ho      he
+ho      who
+hut     but
+is      as
+lie     he
+lime    time
+loth    10th
+m       in
+modem   modern
+Ms      his
+ray     away
+ray     my
+ringer  finger
+ringers fingers
+rioted  noted
+tho     the
+tie     he
+tie     the
+tier    her
+tight   right
+tile    the
+tiling  thing
+tip     up
+tram    train
+tune    time
+u       "
+wen     well
+yon     you
+
+*********************************************************************/
+
diff -r 000000000000 -r c2f4c0285180 gutcheck/gutcheck.typ.in
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gutcheck/gutcheck.typ.in	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,54 @@
+11
+44
+ms
+ail
+alien
+arc
+arid
+bar
+bat
+bo
+borne
+bow
+bum
+bumbled
+carnage
+carne
+cither
+coining
+comer
+cur
+docs
+eve
+eves
+gaming
+gram
+guru
+hag
+hare
+haying
+ho
+lime
+loth
+m
+modem
+nave
+ringer
+ringers
+riot
+rioted
+signer
+snore
+spam
+tho
+tier
+tile
+tiling
+tram
+tum
+tune
+u
+vas
+wag
+wen
+yon
diff -r 000000000000 -r c2f4c0285180 test/Makefile.am
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,1 @@
+SUBDIRS=harness compatibility .
diff -r 000000000000 -r c2f4c0285180 test/compatibility/Makefile.am
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,7 @@
+TESTS_ENVIRONMENT=GUTCHECK=../../gutcheck/gutcheck ../harness/gc-test
+TESTS=missing-space.tst spaced-punctuation.tst html-tag.tst html-symbol.tst \
+	spaced-doublequote.tst mismatched-quotes.tst he-be.tst digits.tst \
+	extra-period.tst ellipsis.tst short-line.tst abbreviation.tst \
+	example.tst
+
+dist_pkgdata_DATA=$(TESTS)
diff -r 000000000000 -r c2f4c0285180 test/compatibility/abbreviation.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/abbreviation.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,9 @@
+**************** INPUT ****************
+This period is an error.But the periods in a.m. aren't.
+**************** EXPECTED ****************
+
+This period is an error.But the periods in a.m. aren't.
+    Line 1 column 45 - Query word m - not reporting duplicates
+
+This period is an error.But the periods in a.m. aren't.
+    Line 1 column 24 - Missing space?
diff -r 000000000000 -r c2f4c0285180 test/compatibility/digits.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/digits.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,12 @@
+**************** INPUT ****************
+0K--this'11 make you look close1y.
+**************** EXPECTED ****************
+
+0K--this'11 make you look close1y.
+    Line 1 column 1 - Query digit in 0K
+
+0K--this'11 make you look close1y.
+    Line 1 column 3 - Query digit in this'11
+
+0K--this'11 make you look close1y.
+    Line 1 column 26 - Query digit in close1y
diff -r 000000000000 -r c2f4c0285180 test/compatibility/ellipsis.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/ellipsis.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,7 @@
+**************** INPUT ****************
+There are some complications . The extra space left around that
+period was an error . . . but that ellipsis wasn't.
+**************** EXPECTED ****************
+
+There are some complications . The extra space left around that
+    Line 1 column 30 - Spaced punctuation?
diff -r 000000000000 -r c2f4c0285180 test/compatibility/example.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/example.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,87 @@
+**************** INPUT ****************
+They saw him distinctly, as with the naked eye; a word, a turn of
+the pen, or a word unsaid, offered the picture of him in America,
+Japan, China, Australia , nay, the continent of Europe, holding an
+English review of his Maker's grotesques. Vernon seemed a
+sheepish fellow, without stature abroad, glad of a compliment
+, grateful for a dinner, endeavouring sadly to digest all he saw
+and heard. But one was a Patterne; tbe other a Whitford. One had
+genius; the other pottered after him to he a student. One was the
+English gent1eman wherever he went; the other was a new kind of
+thing, nondescript, produced in England of late, and not likely
+to come to much good himself, or do much good to the country.
+
+Vernon's dancing in America was capitally described by Willoughby.
+"Adieu to our cousins!" the latter wrote on his voyage to Japan.
+"I may possibly have had some vogue in their ball-rooms, and in
+showing them an English seat on horseback: 1 must resign myself if
+I have not been popular among them. I could not sing their
+national song--if a congery of states be a nation-- and I must
+confess I listened with frigid politeness to their singing of it.
+A great people, no doubt. Adieu to them. I have had to tear old
+Vernon away. He had serious thoughts of settling, means to
+
+correspond with some of them. On the whole, forgetting two or
+more "traits of insolence~ on the part of his hosts, which he
+cited, Willoughby escaped pretty comfortably. The President had
+been, consciously or not,uncivil, but one knew his origin! Upon
+these interjections, placable flicks of the lionly tail addressed
+to Britannia the Ruler, who expected him in some mildish way to
+lash terga cauda in retiring, Sir WilIoughby Patterne passed from
+a land of alien manners,; and ever after he spoke of America
+respectfully aud pensively, with a tail tucked in, as it were. His
+travels were profitable to himself. The fact is, that tbere are
+cousins who come to greatness and rnust be pacified, or they will
+prove annoying. Heaven forefend a collision between cousins!
+**************** EXPECTED ****************
+
+Japan, China, Australia , nay, the continent of Europe, holding an
+    Line 3 column 25 - Spaced punctuation?
+
+, grateful for a dinner, endeavouring sadly to digest all he saw
+    Line 6 column 1 - Begins with punctuation?
+
+and heard. But one was a Patterne; tbe other a Whitford. One had
+    Line 7 column 34 - Query word tbe - not reporting duplicates
+
+genius; the other pottered after him to he a student. One was the
+    Line 8 column 37 - Query he/be error?
+
+English gent1eman wherever he went; the other was a new kind of
+    Line 9 column 8 - Query digit in gent1eman
+
+showing them an English seat on horseback: 1 must resign myself if
+    Line 16 column 43 - Query standalone 1
+
+national song--if a congery of states be a nation-- and I must
+    Line 18 column 50 - Spaced em-dash?
+
+Vernon away. He had serious thoughts of settling, means to
+    Line 21 column 58 - No punctuation at para end?
+
+Vernon's dancing in America was capitally described by Willoughby.
+    Line 22 - Mismatched quotes
+
+correspond with some of them. On the whole, forgetting two or
+    Line 23 column 1 - Paragraph starts with lower-case
+
+more "traits of insolence~ on the part of his hosts, which he
+    Line 24 column 26 - Tilde character?
+
+been, consciously or not,uncivil, but one knew his origin! Upon
+    Line 26 column 25 - Missing space?
+
+lash terga cauda in retiring, Sir WilIoughby Patterne passed from
+    Line 29 column 34 - Query word WilIoughby - not reporting duplicates
+
+a land of alien manners,; and ever after he spoke of America
+    Line 30 column 24 - Double punctuation?
+
+respectfully aud pensively, with a tail tucked in, as it were. His
+    Line 31 column 13 - Query word aud - not reporting duplicates
+
+travels were profitable to himself. The fact is, that tbere are
+    Line 32 column 54 - Query word tbere - not reporting duplicates
+
+cousins who come to greatness and rnust be pacified, or they will
+    Line 33 column 34 - Query word rnust - not reporting duplicates
diff -r 000000000000 -r c2f4c0285180 test/compatibility/extra-period.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/extra-period.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,6 @@
+**************** INPUT ****************
+"If you do. you'll regret it!"
+**************** EXPECTED ****************
+
+"If you do. you'll regret it!"
+    Line 1 column 11 - Extra period?
diff -r 000000000000 -r c2f4c0285180 test/compatibility/he-be.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/he-be.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,6 @@
+**************** INPUT ****************
+The horse is said to he worth a lot.
+**************** EXPECTED ****************
+
+The horse is said to he worth a lot.
+    Line 1 column 18 - Query he/be error?
diff -r 000000000000 -r c2f4c0285180 test/compatibility/html-symbol.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/html-symbol.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,6 @@
+**************** INPUT ****************
+&So;
+**************** EXPECTED ****************
+
+&So;
+    Line 1 column 1 - HTML symbol? &So; 
diff -r 000000000000 -r c2f4c0285180 test/compatibility/html-tag.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/html-tag.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,6 @@
+**************** INPUT ****************
+<This is a tag>
+**************** EXPECTED ****************
+
+<This is a tag>
+    Line 1 column 1 - HTML Tag? <This is a tag> 
diff -r 000000000000 -r c2f4c0285180 test/compatibility/mismatched-quotes.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/mismatched-quotes.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,8 @@
+**************** INPUT ****************
+Margaret said: "Now you should start for school.
+
+New paragraph.
+**************** EXPECTED ****************
+
+Margaret said: "Now you should start for school.
+    Line 2 - Mismatched quotes
diff -r 000000000000 -r c2f4c0285180 test/compatibility/missing-space.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/missing-space.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,6 @@
+**************** INPUT ****************
+"Look!John, over there!"
+**************** EXPECTED ****************
+
+"Look!John, over there!"
+    Line 1 column 6 - Missing space?
diff -r 000000000000 -r c2f4c0285180 test/compatibility/short-line.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/short-line.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,15 @@
+**************** INPUT ****************
+The second line of a paragraph isn't usually short at all
+and
+should be flagged as a warning by gutcheck as long as there
+are sufficient numbers of lines in the file to stop it deciding
+that there are too many short lines to bother reporting, which
+means that I have to waffle on until we have at least 10 lines
+of text.
+
+The last line of a paragraph
+is usually short.
+**************** EXPECTED ****************
+
+and
+    Line 2 column 3 - Short line 3?
diff -r 000000000000 -r c2f4c0285180 test/compatibility/spaced-doublequote.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/spaced-doublequote.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,9 @@
+**************** INPUT ****************
+Margaret said: " Now you should start for school."
+**************** EXPECTED ****************
+
+Margaret said: " Now you should start for school."
+    Line 1 column 16 - Wrongspaced quotes?
+
+Margaret said: " Now you should start for school."
+    Line 1 column 15 - Spaced doublequote?
diff -r 000000000000 -r c2f4c0285180 test/compatibility/spaced-punctuation.tst
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compatibility/spaced-punctuation.tst	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,6 @@
+**************** INPUT ****************
+"Look! John , over there!"
+**************** EXPECTED ****************
+
+"Look! John , over there!"
+    Line 1 column 13 - Spaced punctuation?
diff -r 000000000000 -r c2f4c0285180 test/harness/Makefile.am
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/harness/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,8 @@
+INCLUDES=-I$(top_srcdir)
+bin_PROGRAMS=gc-test
+AM_CFLAGS=$(GLIB_CFLAGS)
+LIBS=$(GLIB_LIBS)
+
+gc_test_SOURCES=gc-test.c testcase.c testcase.h testcaseio.c testcaseio.h \
+	testcaseparser.c testcaseparser.h
+gc_test_LDADD=../../gclib/libgc.la
diff -r 000000000000 -r c2f4c0285180 test/harness/gc-test.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/harness/gc-test.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,31 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <gclib/gclib.h>
+#include "testcase.h"
+#include "testcaseio.h"
+
+/*
+ * Returns FALSE if the test should be considered to have failed.
+ * (returns TRUE on pass or expected-fail).
+ */
+boolean run_test(const char *filename)
+{
+    Testcase *testcase;
+    boolean retval;
+    testcase=testcase_parse_file(filename);
+    if (!testcase)
+	return FALSE;
+    retval=testcase_run(testcase);
+    testcase_free(testcase);
+    return retval;
+}
+
+int main(int argc,char **argv)
+{
+    int i;
+    boolean pass=TRUE;
+    for(i=1;i<argc;i++)
+	pass&=run_test(argv[i]);
+    return pass?0:1;
+}
diff -r 000000000000 -r c2f4c0285180 test/harness/testcase.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/harness/testcase.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,203 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#ifdef WIN32
+#include <io.h>
+#include <fcntl.h>
+#endif
+#include <gclib/gclib.h>
+#include "testcase.h"
+
+#if !HAVE_MKSTEMP
+/*
+ * An insecure implementation of mkstemp(), for those platforms that
+ * don't support it.
+ */
+int mkstemp(char *template)
+{
+    int fd;
+    char *s;
+    for(;;)
+    {
+	s=str_dup(template);
+	mktemp(s);
+	if (!*s)
+	{
+	    errno=EEXIST;
+	    mem_free(s);
+	    return -1;
+	}
+	fd=open(s,O_RDWR|O_CREAT|O_EXCL,0600);
+	if (fd>0)
+	{
+	    strcpy(template,s);
+	    mem_free(s);
+	    return fd;
+	}
+	else
+	    mem_free(s);
+    }
+}
+#endif	/* !HAVE_MKSTEMP */
+
+/*
+ * As write(), but always convert NL to CR NL.
+ */
+static size_t write_text(int fd,const char *buf,size_t count)
+{
+    size_t i;
+    FILE *fp;
+    fd=dup(fd);
+    if (fd<0)
+	return -1;
+#ifdef WIN32
+    if (_setmode(fd,_O_BINARY)<0)
+    {
+	close(fd);
+	return -1;
+    }
+#endif
+    fp=fdopen(fd,"wb");
+    if (!fp)
+    {
+	close(fd);
+	return -1;
+    }
+    for(i=0;i<count;i++)
+    {
+	if (buf[i]=='\n')
+	    if (putc('\r',fp)==EOF)
+	    {
+		(void)fclose(fp);
+		return -1;
+	    }
+	if (putc(buf[i],fp)==EOF)
+	{
+	    (void)fclose(fp);
+	    return -1;
+	}
+    }
+    if (fclose(fp))
+	return -1;
+    return count;
+}
+
+/*
+ * Return the length (in bytes) or any common prefix between s1 and s2.
+ */
+size_t common_prefix_length(const char *s1,const char *s2)
+{
+    size_t i;
+    for(i=0;s1[i] && s2[i] && s1[i]==s2[i];i++)
+	;
+    return i;
+}
+
+/*
+ * Run a testcase, returning FALSE on fail or error and
+ * TRUE on pass or expected-fail.
+ * Suitable message(s) will be printed in all cases.
+ */
+boolean testcase_run(Testcase *testcase)
+{
+    boolean r;
+    int fd,exit_status,col;
+    size_t n,pos,offset,header_len;
+    FILE *fp;
+    char input[]="TEST-XXXXXX";
+    char *endp,*bol;
+    char *command[3];
+    String *expected,*report;
+    char *output;
+    fd=mkstemp(input);
+    if (testcase->input)
+	n=strlen(testcase->input);
+    else
+	n=0;
+    if (n && write_text(fd,testcase->input,n)!=n)
+    {
+	perror(input);
+	close(fd);
+	(void)remove(input);
+	return FALSE;
+    }
+    close(fd);
+    command[0]=getenv("GUTCHECK");
+    if (!command[0])
+	command[0]="." GC_DIR_SEPARATOR_S "gutcheck";
+    command[1]=input;
+    command[2]=NULL;
+    if (testcase->expected)
+	r=spawn_sync(command,&output,&exit_status);
+    else
+    {
+	r=spawn_sync(command,NULL,&exit_status);
+	output=NULL;
+    }
+    (void)remove(input);
+    if (!r)
+	return FALSE;
+    if (testcase->expected)
+    {
+	expected=string_new("\n\nFile: ");
+	string_append(expected,input);
+	string_append(expected,"\n\n\n");
+	header_len=expected->len;
+	string_append(expected,testcase->expected);
+    }
+    else
+    {
+	expected=NULL;
+	header_len=0;
+    }
+    if (expected && strcmp(output,expected->str))
+    {
+	fprintf(stderr,"%s: FAIL\n",testcase->basename);
+	offset=common_prefix_length(output,expected->str);
+	if (offset==header_len && !output[offset])
+	    fprintf(stderr,"Unexpected zero warnings from gutcheck.\n");
+	else
+	{
+	    endp=strchr(output+offset,'\n');
+	    if (!endp)
+		endp=output+strlen(output);
+	    report=string_new(NULL);
+	    string_append_len(report,output,endp-output);
+	    bol=strrchr(report->str,'\n');
+	    if (bol)
+		bol++;
+	    else
+		bol=report->str;
+	    col=offset-(bol-report->str);
+	    fprintf(stderr,"Unexpected output from gutcheck:\n");
+	    if (report->len>=header_len)
+		fprintf(stderr,"%s\n%*s^\n",report->str+header_len,col,"");
+	    else
+		fprintf(stderr,"%s\n%*s^\n",report->str,col,"");
+	    string_free(report,TRUE);
+	}
+	string_free(expected,TRUE);
+	mem_free(output);
+	return FALSE;
+    }
+    string_free(expected,TRUE);
+    mem_free(output);
+    if (exit_status)
+	fprintf(stderr,"gutcheck exited with code %d\n",r);
+    if (!exit_status)
+	fprintf(stderr,"%s: PASS\n",testcase->basename);
+    return !exit_status;
+}
+
+/*
+ * Free a testcase.
+ */
+void testcase_free(Testcase *testcase)
+{
+    mem_free(testcase->basename);
+    mem_free(testcase->input);
+    mem_free(testcase->expected);
+    mem_free(testcase);
+}
diff -r 000000000000 -r c2f4c0285180 test/harness/testcase.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/harness/testcase.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,16 @@
+#ifndef TESTCASE_H
+#define TESTCASE_H
+
+typedef struct {
+    char *basename;
+    char *input;
+    char *expected;
+    enum {
+	TESTCASE_XFAIL=1<<0,
+    } flags;
+} Testcase;
+
+boolean testcase_run(Testcase *testcase);
+void testcase_free(Testcase *testcase);
+
+#endif	/* TESTCASE_H */
diff -r 000000000000 -r c2f4c0285180 test/harness/testcaseio.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/harness/testcaseio.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,63 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <gclib/gclib.h>
+#include "testcaseparser.h"
+#include "testcaseio.h"
+
+/*
+ * Read a testcase in from a file.
+ * On error, print a suitable message on stderr and return NULL.
+ * The returned testcase should be freed with testcase_free().
+ */
+Testcase *testcase_parse_file(const char *filename)
+{
+    Testcase *testcase;
+    TestcaseParser *parser;
+    char *s;
+    const char *tag,*text;
+    boolean found_tag=FALSE;
+    parser=testcase_parser_new_from_file(filename);
+    if (!parser)
+	return NULL;
+    if (!*testcase_parser_get_flag(parser))
+    {
+	fprintf(stderr,"%s: Not a valid testcase (flag)\n",filename);
+	testcase_parser_free(parser);
+	return NULL;
+    }
+    testcase=mem_new0(Testcase,1);
+    testcase->basename=path_get_basename(filename);
+    s=strrchr(testcase->basename,'.');
+    if (s)
+	*s='\0';
+    while(testcase_parser_get_next_tag(parser,&tag,&text))
+    {
+	if (!testcase->input && !strcmp(tag,"INPUT"))
+	    testcase->input=str_dup(text);
+	else if (!testcase->expected && !strcmp(tag,"EXPECTED"))
+	    testcase->expected=str_dup(text);
+	else
+	{
+	    fprintf(stderr,"%s: Not a valid testcase (%s)\n",filename,tag);
+	    testcase_free(testcase);
+	    testcase_parser_free(parser);
+	    return NULL;
+	}
+	found_tag=TRUE;
+    }
+    if (!testcase_parser_at_eof(parser))
+    {
+	if (found_tag)
+	    fprintf(stderr,"%s: Not a valid testcase (garbage at end)\n",
+	      filename);
+	else
+	    fprintf(stderr,"%s: Not a valid testcase (no valid tags)\n",
+	      filename);
+	testcase_free(testcase);
+	testcase_parser_free(parser);
+	return NULL;
+    }
+    testcase_parser_free(parser);
+    return testcase;
+}
diff -r 000000000000 -r c2f4c0285180 test/harness/testcaseio.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/harness/testcaseio.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,8 @@
+#ifndef TESTCASE_IO_H
+#define TESTCASE_IO_H
+
+#include "testcase.h"
+
+Testcase *testcase_parse_file(const char *filename);
+
+#endif	/* TESTCASE_IO_H */
diff -r 000000000000 -r c2f4c0285180 test/harness/testcaseparser.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/harness/testcaseparser.c	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,115 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <gclib/gclib.h>
+#include "testcaseparser.h"
+
+/*
+ * Get the flag (the string of characters which bracket tags in test cases).
+ */
+const char *testcase_parser_get_flag(TestcaseParser *parser)
+{
+    char *s=parser->contents;
+    if (!parser->flag)
+    {
+	parser->flag=string_new(NULL);
+	while(*s>' ' && *s<='~')
+	    string_append_c(parser->flag,*s++);
+    }
+    return parser->flag->str;
+}
+
+/*
+ * Test if the parser has reached the end of the input file
+ */
+boolean testcase_parser_at_eof(TestcaseParser *parser)
+{
+    return !parser->contents[parser->pos];
+}
+
+/*
+ * Get the next tag (and its associated text, if any) from a test case.
+ * Returns: TRUE if successful and FALSE if no more valid tags are present.
+ * Callers can call testcase_parser_at_eof() when testcase_parser_get_next_tag()
+ * to distinguish EOF and text which isn't a valid tag.
+ */
+boolean testcase_parser_get_next_tag(TestcaseParser *parser,const char **tag,
+  const char **text)
+{
+    size_t n;
+    char *eol,*endp;
+    String *string;
+    mem_free(parser->tag);
+    parser->tag=NULL;
+    mem_free(parser->tag_text);
+    parser->tag_text=NULL;
+    (void)testcase_parser_get_flag(parser);
+    if (strncmp(parser->contents+parser->pos,parser->flag->str,
+      parser->flag->len))
+	return FALSE;
+    eol=strchr(parser->contents+parser->pos,'\n');
+    if (!eol)
+	return FALSE;
+    endp=eol-parser->flag->len;
+    if (strncmp(endp,parser->flag->str,parser->flag->len))
+	return FALSE;
+    while(endp>parser->contents && isspace(endp[-1]))
+	endp--;
+    parser->pos+=parser->flag->len;
+    while(isspace(parser->contents[parser->pos]))
+	parser->pos++;
+    parser->tag=str_ndup(parser->contents+parser->pos,
+      endp-(parser->contents+parser->pos));
+    parser->pos=eol-parser->contents+1;
+    string=string_new(NULL);
+    while (!testcase_parser_at_eof(parser) &&
+      strncmp(parser->contents+parser->pos,parser->flag->str,parser->flag->len))
+    {
+	eol=strchr(parser->contents+parser->pos,'\n');
+	if (eol)
+	    n=eol-(parser->contents+parser->pos)+1;
+	else
+	    n=strlen(parser->contents+parser->pos);
+	string_append_len(string,parser->contents+parser->pos,n);
+	parser->pos+=n;
+    }
+    parser->tag_text=string_free(string,FALSE);
+    if (!parser->tag_text)
+	parser->tag_text=str_dup("");
+    if (tag)
+	*tag=parser->tag;
+    if (text)
+	*text=parser->tag_text;
+    return TRUE;
+}
+
+/*
+ * Create a testcase parser to read a regular file.
+ */
+TestcaseParser *testcase_parser_new_from_file(const char *filename)
+{
+    TestcaseParser *parser;
+    parser=mem_new0(TestcaseParser,1);
+    if (!file_get_contents_text(filename,&parser->contents,NULL))
+    {
+	mem_free(parser);
+	return NULL;
+    }
+    parser->filename=str_dup(filename);
+    return parser;
+}
+
+/*
+ * Free a testcase parser.
+ */
+void testcase_parser_free(TestcaseParser *parser)
+{
+    mem_free(parser->filename);
+    mem_free(parser->contents);
+    if (parser->flag)
+	string_free(parser->flag,TRUE);
+    mem_free(parser->tag);
+    mem_free(parser->tag_text);
+    mem_free(parser);
+}
diff -r 000000000000 -r c2f4c0285180 test/harness/testcaseparser.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/harness/testcaseparser.h	Tue Jan 24 23:54:05 2012 +0000
@@ -0,0 +1,22 @@
+#ifndef TESTCASE_PARSER_H
+#define TESTCASE_PARSER_H
+
+#include <gclib/gclib.h>
+
+typedef struct {
+    char *filename;
+    char *contents;
+    String *flag;
+    size_t pos;
+    char *tag;
+    char *tag_text;
+} TestcaseParser;
+
+const char *testcase_parser_get_flag(TestcaseParser *parser);
+boolean testcase_parser_get_next_tag(TestcaseParser *parser,const char **tag,
+  const char **text);
+boolean testcase_parser_at_eof(TestcaseParser *parser);
+TestcaseParser *testcase_parser_new_from_file(const char *filename);
+void testcase_parser_free(TestcaseParser *parser);
+
+#endif	/* TESTCASE_PARSER_H */