Initial version 1.50
authorali <ali@juiblex.co.uk>
Tue Jan 24 23:54:05 2012 +0000 (2012-01-24)
changeset 0c2f4c0285180
child 1 707d51fedbe0
Initial version
.hgignore
COPYING
INSTALL
Makefile.am
README
bootstrap.sh
configure.ac
doc/Makefile.am
doc/README-0.99
doc/gc-test.txt
doc/gutcheck.txt
gclib/Makefile.am
gclib/fileutils.c
gclib/fileutils.h
gclib/gclib.h
gclib/gcstring.c
gclib/gcstring.h
gclib/macros.h
gclib/mem.c
gclib/mem.h
gclib/spawn.c
gclib/spawn.h
gclib/strfuncs.c
gclib/strfuncs.h
gclib/textfileutils.c
gclib/textfileutils.h
gclib/types.h
gclib/utils.c
gclib/utils.h
gutcheck/Makefile.am
gutcheck/gutcheck.c
gutcheck/gutcheck.typ.in
test/Makefile.am
test/compatibility/Makefile.am
test/compatibility/abbreviation.tst
test/compatibility/digits.tst
test/compatibility/ellipsis.tst
test/compatibility/example.tst
test/compatibility/extra-period.tst
test/compatibility/he-be.tst
test/compatibility/html-symbol.tst
test/compatibility/html-tag.tst
test/compatibility/mismatched-quotes.tst
test/compatibility/missing-space.tst
test/compatibility/short-line.tst
test/compatibility/spaced-doublequote.tst
test/compatibility/spaced-punctuation.tst
test/harness/Makefile.am
test/harness/gc-test.c
test/harness/testcase.c
test/harness/testcase.h
test/harness/testcaseio.c
test/harness/testcaseio.h
test/harness/testcaseparser.c
test/harness/testcaseparser.h
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/.hgignore	Tue Jan 24 23:54:05 2012 +0000
     1.3 @@ -0,0 +1,22 @@
     1.4 +gutcheck-.*\.tar\.gz
     1.5 +gutcheck-.*/
     1.6 +Makefile$
     1.7 +Makefile\.in
     1.8 +aclocal\.m4
     1.9 +libtool
    1.10 +stamp-h1
    1.11 +autom4te\.cache
    1.12 +config\.log
    1.13 +config\.status
    1.14 +config/
    1.15 +configure
    1.16 +\.deps/
    1.17 +\.libs/
    1.18 +\..*\.swp
    1.19 +.*\.o
    1.20 +.*\.la
    1.21 +.*\.lo
    1.22 +.*\.exe
    1.23 +gutcheck/gutcheck\.typ
    1.24 +gutcheck/gutcheck
    1.25 +test/harness/gc-test
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/COPYING	Tue Jan 24 23:54:05 2012 +0000
     2.3 @@ -0,0 +1,340 @@
     2.4 +		    GNU GENERAL PUBLIC LICENSE
     2.5 +		       Version 2, June 1991
     2.6 +
     2.7 + Copyright (C) 1989, 1991 Free Software Foundation, Inc.
     2.8 +                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     2.9 + Everyone is permitted to copy and distribute verbatim copies
    2.10 + of this license document, but changing it is not allowed.
    2.11 +
    2.12 +			    Preamble
    2.13 +
    2.14 +  The licenses for most software are designed to take away your
    2.15 +freedom to share and change it.  By contrast, the GNU General Public
    2.16 +License is intended to guarantee your freedom to share and change free
    2.17 +software--to make sure the software is free for all its users.  This
    2.18 +General Public License applies to most of the Free Software
    2.19 +Foundation's software and to any other program whose authors commit to
    2.20 +using it.  (Some other Free Software Foundation software is covered by
    2.21 +the GNU Library General Public License instead.)  You can apply it to
    2.22 +your programs, too.
    2.23 +
    2.24 +  When we speak of free software, we are referring to freedom, not
    2.25 +price.  Our General Public Licenses are designed to make sure that you
    2.26 +have the freedom to distribute copies of free software (and charge for
    2.27 +this service if you wish), that you receive source code or can get it
    2.28 +if you want it, that you can change the software or use pieces of it
    2.29 +in new free programs; and that you know you can do these things.
    2.30 +
    2.31 +  To protect your rights, we need to make restrictions that forbid
    2.32 +anyone to deny you these rights or to ask you to surrender the rights.
    2.33 +These restrictions translate to certain responsibilities for you if you
    2.34 +distribute copies of the software, or if you modify it.
    2.35 +
    2.36 +  For example, if you distribute copies of such a program, whether
    2.37 +gratis or for a fee, you must give the recipients all the rights that
    2.38 +you have.  You must make sure that they, too, receive or can get the
    2.39 +source code.  And you must show them these terms so they know their
    2.40 +rights.
    2.41 +
    2.42 +  We protect your rights with two steps: (1) copyright the software, and
    2.43 +(2) offer you this license which gives you legal permission to copy,
    2.44 +distribute and/or modify the software.
    2.45 +
    2.46 +  Also, for each author's protection and ours, we want to make certain
    2.47 +that everyone understands that there is no warranty for this free
    2.48 +software.  If the software is modified by someone else and passed on, we
    2.49 +want its recipients to know that what they have is not the original, so
    2.50 +that any problems introduced by others will not reflect on the original
    2.51 +authors' reputations.
    2.52 +
    2.53 +  Finally, any free program is threatened constantly by software
    2.54 +patents.  We wish to avoid the danger that redistributors of a free
    2.55 +program will individually obtain patent licenses, in effect making the
    2.56 +program proprietary.  To prevent this, we have made it clear that any
    2.57 +patent must be licensed for everyone's free use or not licensed at all.
    2.58 +
    2.59 +  The precise terms and conditions for copying, distribution and
    2.60 +modification follow.
    2.61 +
    2.62 +		    GNU GENERAL PUBLIC LICENSE
    2.63 +   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
    2.64 +
    2.65 +  0. This License applies to any program or other work which contains
    2.66 +a notice placed by the copyright holder saying it may be distributed
    2.67 +under the terms of this General Public License.  The "Program", below,
    2.68 +refers to any such program or work, and a "work based on the Program"
    2.69 +means either the Program or any derivative work under copyright law:
    2.70 +that is to say, a work containing the Program or a portion of it,
    2.71 +either verbatim or with modifications and/or translated into another
    2.72 +language.  (Hereinafter, translation is included without limitation in
    2.73 +the term "modification".)  Each licensee is addressed as "you".
    2.74 +
    2.75 +Activities other than copying, distribution and modification are not
    2.76 +covered by this License; they are outside its scope.  The act of
    2.77 +running the Program is not restricted, and the output from the Program
    2.78 +is covered only if its contents constitute a work based on the
    2.79 +Program (independent of having been made by running the Program).
    2.80 +Whether that is true depends on what the Program does.
    2.81 +
    2.82 +  1. You may copy and distribute verbatim copies of the Program's
    2.83 +source code as you receive it, in any medium, provided that you
    2.84 +conspicuously and appropriately publish on each copy an appropriate
    2.85 +copyright notice and disclaimer of warranty; keep intact all the
    2.86 +notices that refer to this License and to the absence of any warranty;
    2.87 +and give any other recipients of the Program a copy of this License
    2.88 +along with the Program.
    2.89 +
    2.90 +You may charge a fee for the physical act of transferring a copy, and
    2.91 +you may at your option offer warranty protection in exchange for a fee.
    2.92 +
    2.93 +  2. You may modify your copy or copies of the Program or any portion
    2.94 +of it, thus forming a work based on the Program, and copy and
    2.95 +distribute such modifications or work under the terms of Section 1
    2.96 +above, provided that you also meet all of these conditions:
    2.97 +
    2.98 +    a) You must cause the modified files to carry prominent notices
    2.99 +    stating that you changed the files and the date of any change.
   2.100 +
   2.101 +    b) You must cause any work that you distribute or publish, that in
   2.102 +    whole or in part contains or is derived from the Program or any
   2.103 +    part thereof, to be licensed as a whole at no charge to all third
   2.104 +    parties under the terms of this License.
   2.105 +
   2.106 +    c) If the modified program normally reads commands interactively
   2.107 +    when run, you must cause it, when started running for such
   2.108 +    interactive use in the most ordinary way, to print or display an
   2.109 +    announcement including an appropriate copyright notice and a
   2.110 +    notice that there is no warranty (or else, saying that you provide
   2.111 +    a warranty) and that users may redistribute the program under
   2.112 +    these conditions, and telling the user how to view a copy of this
   2.113 +    License.  (Exception: if the Program itself is interactive but
   2.114 +    does not normally print such an announcement, your work based on
   2.115 +    the Program is not required to print an announcement.)
   2.116 +
   2.117 +These requirements apply to the modified work as a whole.  If
   2.118 +identifiable sections of that work are not derived from the Program,
   2.119 +and can be reasonably considered independent and separate works in
   2.120 +themselves, then this License, and its terms, do not apply to those
   2.121 +sections when you distribute them as separate works.  But when you
   2.122 +distribute the same sections as part of a whole which is a work based
   2.123 +on the Program, the distribution of the whole must be on the terms of
   2.124 +this License, whose permissions for other licensees extend to the
   2.125 +entire whole, and thus to each and every part regardless of who wrote it.
   2.126 +
   2.127 +Thus, it is not the intent of this section to claim rights or contest
   2.128 +your rights to work written entirely by you; rather, the intent is to
   2.129 +exercise the right to control the distribution of derivative or
   2.130 +collective works based on the Program.
   2.131 +
   2.132 +In addition, mere aggregation of another work not based on the Program
   2.133 +with the Program (or with a work based on the Program) on a volume of
   2.134 +a storage or distribution medium does not bring the other work under
   2.135 +the scope of this License.
   2.136 +
   2.137 +  3. You may copy and distribute the Program (or a work based on it,
   2.138 +under Section 2) in object code or executable form under the terms of
   2.139 +Sections 1 and 2 above provided that you also do one of the following:
   2.140 +
   2.141 +    a) Accompany it with the complete corresponding machine-readable
   2.142 +    source code, which must be distributed under the terms of Sections
   2.143 +    1 and 2 above on a medium customarily used for software interchange; or,
   2.144 +
   2.145 +    b) Accompany it with a written offer, valid for at least three
   2.146 +    years, to give any third party, for a charge no more than your
   2.147 +    cost of physically performing source distribution, a complete
   2.148 +    machine-readable copy of the corresponding source code, to be
   2.149 +    distributed under the terms of Sections 1 and 2 above on a medium
   2.150 +    customarily used for software interchange; or,
   2.151 +
   2.152 +    c) Accompany it with the information you received as to the offer
   2.153 +    to distribute corresponding source code.  (This alternative is
   2.154 +    allowed only for noncommercial distribution and only if you
   2.155 +    received the program in object code or executable form with such
   2.156 +    an offer, in accord with Subsection b above.)
   2.157 +
   2.158 +The source code for a work means the preferred form of the work for
   2.159 +making modifications to it.  For an executable work, complete source
   2.160 +code means all the source code for all modules it contains, plus any
   2.161 +associated interface definition files, plus the scripts used to
   2.162 +control compilation and installation of the executable.  However, as a
   2.163 +special exception, the source code distributed need not include
   2.164 +anything that is normally distributed (in either source or binary
   2.165 +form) with the major components (compiler, kernel, and so on) of the
   2.166 +operating system on which the executable runs, unless that component
   2.167 +itself accompanies the executable.
   2.168 +
   2.169 +If distribution of executable or object code is made by offering
   2.170 +access to copy from a designated place, then offering equivalent
   2.171 +access to copy the source code from the same place counts as
   2.172 +distribution of the source code, even though third parties are not
   2.173 +compelled to copy the source along with the object code.
   2.174 +
   2.175 +  4. You may not copy, modify, sublicense, or distribute the Program
   2.176 +except as expressly provided under this License.  Any attempt
   2.177 +otherwise to copy, modify, sublicense or distribute the Program is
   2.178 +void, and will automatically terminate your rights under this License.
   2.179 +However, parties who have received copies, or rights, from you under
   2.180 +this License will not have their licenses terminated so long as such
   2.181 +parties remain in full compliance.
   2.182 +
   2.183 +  5. You are not required to accept this License, since you have not
   2.184 +signed it.  However, nothing else grants you permission to modify or
   2.185 +distribute the Program or its derivative works.  These actions are
   2.186 +prohibited by law if you do not accept this License.  Therefore, by
   2.187 +modifying or distributing the Program (or any work based on the
   2.188 +Program), you indicate your acceptance of this License to do so, and
   2.189 +all its terms and conditions for copying, distributing or modifying
   2.190 +the Program or works based on it.
   2.191 +
   2.192 +  6. Each time you redistribute the Program (or any work based on the
   2.193 +Program), the recipient automatically receives a license from the
   2.194 +original licensor to copy, distribute or modify the Program subject to
   2.195 +these terms and conditions.  You may not impose any further
   2.196 +restrictions on the recipients' exercise of the rights granted herein.
   2.197 +You are not responsible for enforcing compliance by third parties to
   2.198 +this License.
   2.199 +
   2.200 +  7. If, as a consequence of a court judgment or allegation of patent
   2.201 +infringement or for any other reason (not limited to patent issues),
   2.202 +conditions are imposed on you (whether by court order, agreement or
   2.203 +otherwise) that contradict the conditions of this License, they do not
   2.204 +excuse you from the conditions of this License.  If you cannot
   2.205 +distribute so as to satisfy simultaneously your obligations under this
   2.206 +License and any other pertinent obligations, then as a consequence you
   2.207 +may not distribute the Program at all.  For example, if a patent
   2.208 +license would not permit royalty-free redistribution of the Program by
   2.209 +all those who receive copies directly or indirectly through you, then
   2.210 +the only way you could satisfy both it and this License would be to
   2.211 +refrain entirely from distribution of the Program.
   2.212 +
   2.213 +If any portion of this section is held invalid or unenforceable under
   2.214 +any particular circumstance, the balance of the section is intended to
   2.215 +apply and the section as a whole is intended to apply in other
   2.216 +circumstances.
   2.217 +
   2.218 +It is not the purpose of this section to induce you to infringe any
   2.219 +patents or other property right claims or to contest validity of any
   2.220 +such claims; this section has the sole purpose of protecting the
   2.221 +integrity of the free software distribution system, which is
   2.222 +implemented by public license practices.  Many people have made
   2.223 +generous contributions to the wide range of software distributed
   2.224 +through that system in reliance on consistent application of that
   2.225 +system; it is up to the author/donor to decide if he or she is willing
   2.226 +to distribute software through any other system and a licensee cannot
   2.227 +impose that choice.
   2.228 +
   2.229 +This section is intended to make thoroughly clear what is believed to
   2.230 +be a consequence of the rest of this License.
   2.231 +
   2.232 +  8. If the distribution and/or use of the Program is restricted in
   2.233 +certain countries either by patents or by copyrighted interfaces, the
   2.234 +original copyright holder who places the Program under this License
   2.235 +may add an explicit geographical distribution limitation excluding
   2.236 +those countries, so that distribution is permitted only in or among
   2.237 +countries not thus excluded.  In such case, this License incorporates
   2.238 +the limitation as if written in the body of this License.
   2.239 +
   2.240 +  9. The Free Software Foundation may publish revised and/or new versions
   2.241 +of the General Public License from time to time.  Such new versions will
   2.242 +be similar in spirit to the present version, but may differ in detail to
   2.243 +address new problems or concerns.
   2.244 +
   2.245 +Each version is given a distinguishing version number.  If the Program
   2.246 +specifies a version number of this License which applies to it and "any
   2.247 +later version", you have the option of following the terms and conditions
   2.248 +either of that version or of any later version published by the Free
   2.249 +Software Foundation.  If the Program does not specify a version number of
   2.250 +this License, you may choose any version ever published by the Free Software
   2.251 +Foundation.
   2.252 +
   2.253 +  10. If you wish to incorporate parts of the Program into other free
   2.254 +programs whose distribution conditions are different, write to the author
   2.255 +to ask for permission.  For software which is copyrighted by the Free
   2.256 +Software Foundation, write to the Free Software Foundation; we sometimes
   2.257 +make exceptions for this.  Our decision will be guided by the two goals
   2.258 +of preserving the free status of all derivatives of our free software and
   2.259 +of promoting the sharing and reuse of software generally.
   2.260 +
   2.261 +			    NO WARRANTY
   2.262 +
   2.263 +  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
   2.264 +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
   2.265 +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
   2.266 +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
   2.267 +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
   2.268 +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
   2.269 +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
   2.270 +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
   2.271 +REPAIR OR CORRECTION.
   2.272 +
   2.273 +  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
   2.274 +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
   2.275 +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
   2.276 +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
   2.277 +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
   2.278 +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
   2.279 +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
   2.280 +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
   2.281 +POSSIBILITY OF SUCH DAMAGES.
   2.282 +
   2.283 +		     END OF TERMS AND CONDITIONS
   2.284 +
   2.285 +	    How to Apply These Terms to Your New Programs
   2.286 +
   2.287 +  If you develop a new program, and you want it to be of the greatest
   2.288 +possible use to the public, the best way to achieve this is to make it
   2.289 +free software which everyone can redistribute and change under these terms.
   2.290 +
   2.291 +  To do so, attach the following notices to the program.  It is safest
   2.292 +to attach them to the start of each source file to most effectively
   2.293 +convey the exclusion of warranty; and each file should have at least
   2.294 +the "copyright" line and a pointer to where the full notice is found.
   2.295 +
   2.296 +    <one line to give the program's name and a brief idea of what it does.>
   2.297 +    Copyright (C) <year>  <name of author>
   2.298 +
   2.299 +    This program is free software; you can redistribute it and/or modify
   2.300 +    it under the terms of the GNU General Public License as published by
   2.301 +    the Free Software Foundation; either version 2 of the License, or
   2.302 +    (at your option) any later version.
   2.303 +
   2.304 +    This program is distributed in the hope that it will be useful,
   2.305 +    but WITHOUT ANY WARRANTY; without even the implied warranty of
   2.306 +    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   2.307 +    GNU General Public License for more details.
   2.308 +
   2.309 +    You should have received a copy of the GNU General Public License
   2.310 +    along with this program; if not, write to the Free Software
   2.311 +    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   2.312 +
   2.313 +
   2.314 +Also add information on how to contact you by electronic and paper mail.
   2.315 +
   2.316 +If the program is interactive, make it output a short notice like this
   2.317 +when it starts in an interactive mode:
   2.318 +
   2.319 +    Gnomovision version 69, Copyright (C) year name of author
   2.320 +    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
   2.321 +    This is free software, and you are welcome to redistribute it
   2.322 +    under certain conditions; type `show c' for details.
   2.323 +
   2.324 +The hypothetical commands `show w' and `show c' should show the appropriate
   2.325 +parts of the General Public License.  Of course, the commands you use may
   2.326 +be called something other than `show w' and `show c'; they could even be
   2.327 +mouse-clicks or menu items--whatever suits your program.
   2.328 +
   2.329 +You should also get your employer (if you work as a programmer) or your
   2.330 +school, if any, to sign a "copyright disclaimer" for the program, if
   2.331 +necessary.  Here is a sample; alter the names:
   2.332 +
   2.333 +  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
   2.334 +  `Gnomovision' (which makes passes at compilers) written by James Hacker.
   2.335 +
   2.336 +  <signature of Ty Coon>, 1 April 1989
   2.337 +  Ty Coon, President of Vice
   2.338 +
   2.339 +This General Public License does not permit incorporating your program into
   2.340 +proprietary programs.  If your program is a subroutine library, you may
   2.341 +consider it more useful to permit linking proprietary applications with the
   2.342 +library.  If this is what you want to do, use the GNU Library General
   2.343 +Public License instead of this License.
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/INSTALL	Tue Jan 24 23:54:05 2012 +0000
     3.3 @@ -0,0 +1,365 @@
     3.4 +Installation Instructions
     3.5 +*************************
     3.6 +
     3.7 +Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
     3.8 +2006, 2007, 2008, 2009 Free Software Foundation, Inc.
     3.9 +
    3.10 +   Copying and distribution of this file, with or without modification,
    3.11 +are permitted in any medium without royalty provided the copyright
    3.12 +notice and this notice are preserved.  This file is offered as-is,
    3.13 +without warranty of any kind.
    3.14 +
    3.15 +Basic Installation
    3.16 +==================
    3.17 +
    3.18 +   Briefly, the shell commands `./configure; make; make install' should
    3.19 +configure, build, and install this package.  The following
    3.20 +more-detailed instructions are generic; see the `README' file for
    3.21 +instructions specific to this package.  Some packages provide this
    3.22 +`INSTALL' file but do not implement all of the features documented
    3.23 +below.  The lack of an optional feature in a given package is not
    3.24 +necessarily a bug.  More recommendations for GNU packages can be found
    3.25 +in *note Makefile Conventions: (standards)Makefile Conventions.
    3.26 +
    3.27 +   The `configure' shell script attempts to guess correct values for
    3.28 +various system-dependent variables used during compilation.  It uses
    3.29 +those values to create a `Makefile' in each directory of the package.
    3.30 +It may also create one or more `.h' files containing system-dependent
    3.31 +definitions.  Finally, it creates a shell script `config.status' that
    3.32 +you can run in the future to recreate the current configuration, and a
    3.33 +file `config.log' containing compiler output (useful mainly for
    3.34 +debugging `configure').
    3.35 +
    3.36 +   It can also use an optional file (typically called `config.cache'
    3.37 +and enabled with `--cache-file=config.cache' or simply `-C') that saves
    3.38 +the results of its tests to speed up reconfiguring.  Caching is
    3.39 +disabled by default to prevent problems with accidental use of stale
    3.40 +cache files.
    3.41 +
    3.42 +   If you need to do unusual things to compile the package, please try
    3.43 +to figure out how `configure' could check whether to do them, and mail
    3.44 +diffs or instructions to the address given in the `README' so they can
    3.45 +be considered for the next release.  If you are using the cache, and at
    3.46 +some point `config.cache' contains results you don't want to keep, you
    3.47 +may remove or edit it.
    3.48 +
    3.49 +   The file `configure.ac' (or `configure.in') is used to create
    3.50 +`configure' by a program called `autoconf'.  You need `configure.ac' if
    3.51 +you want to change it or regenerate `configure' using a newer version
    3.52 +of `autoconf'.
    3.53 +
    3.54 +   The simplest way to compile this package is:
    3.55 +
    3.56 +  1. `cd' to the directory containing the package's source code and type
    3.57 +     `./configure' to configure the package for your system.
    3.58 +
    3.59 +     Running `configure' might take a while.  While running, it prints
    3.60 +     some messages telling which features it is checking for.
    3.61 +
    3.62 +  2. Type `make' to compile the package.
    3.63 +
    3.64 +  3. Optionally, type `make check' to run any self-tests that come with
    3.65 +     the package, generally using the just-built uninstalled binaries.
    3.66 +
    3.67 +  4. Type `make install' to install the programs and any data files and
    3.68 +     documentation.  When installing into a prefix owned by root, it is
    3.69 +     recommended that the package be configured and built as a regular
    3.70 +     user, and only the `make install' phase executed with root
    3.71 +     privileges.
    3.72 +
    3.73 +  5. Optionally, type `make installcheck' to repeat any self-tests, but
    3.74 +     this time using the binaries in their final installed location.
    3.75 +     This target does not install anything.  Running this target as a
    3.76 +     regular user, particularly if the prior `make install' required
    3.77 +     root privileges, verifies that the installation completed
    3.78 +     correctly.
    3.79 +
    3.80 +  6. You can remove the program binaries and object files from the
    3.81 +     source code directory by typing `make clean'.  To also remove the
    3.82 +     files that `configure' created (so you can compile the package for
    3.83 +     a different kind of computer), type `make distclean'.  There is
    3.84 +     also a `make maintainer-clean' target, but that is intended mainly
    3.85 +     for the package's developers.  If you use it, you may have to get
    3.86 +     all sorts of other programs in order to regenerate files that came
    3.87 +     with the distribution.
    3.88 +
    3.89 +  7. Often, you can also type `make uninstall' to remove the installed
    3.90 +     files again.  In practice, not all packages have tested that
    3.91 +     uninstallation works correctly, even though it is required by the
    3.92 +     GNU Coding Standards.
    3.93 +
    3.94 +  8. Some packages, particularly those that use Automake, provide `make
    3.95 +     distcheck', which can by used by developers to test that all other
    3.96 +     targets like `make install' and `make uninstall' work correctly.
    3.97 +     This target is generally not run by end users.
    3.98 +
    3.99 +Compilers and Options
   3.100 +=====================
   3.101 +
   3.102 +   Some systems require unusual options for compilation or linking that
   3.103 +the `configure' script does not know about.  Run `./configure --help'
   3.104 +for details on some of the pertinent environment variables.
   3.105 +
   3.106 +   You can give `configure' initial values for configuration parameters
   3.107 +by setting variables in the command line or in the environment.  Here
   3.108 +is an example:
   3.109 +
   3.110 +     ./configure CC=c99 CFLAGS=-g LIBS=-lposix
   3.111 +
   3.112 +   *Note Defining Variables::, for more details.
   3.113 +
   3.114 +Compiling For Multiple Architectures
   3.115 +====================================
   3.116 +
   3.117 +   You can compile the package for more than one kind of computer at the
   3.118 +same time, by placing the object files for each architecture in their
   3.119 +own directory.  To do this, you can use GNU `make'.  `cd' to the
   3.120 +directory where you want the object files and executables to go and run
   3.121 +the `configure' script.  `configure' automatically checks for the
   3.122 +source code in the directory that `configure' is in and in `..'.  This
   3.123 +is known as a "VPATH" build.
   3.124 +
   3.125 +   With a non-GNU `make', it is safer to compile the package for one
   3.126 +architecture at a time in the source code directory.  After you have
   3.127 +installed the package for one architecture, use `make distclean' before
   3.128 +reconfiguring for another architecture.
   3.129 +
   3.130 +   On MacOS X 10.5 and later systems, you can create libraries and
   3.131 +executables that work on multiple system types--known as "fat" or
   3.132 +"universal" binaries--by specifying multiple `-arch' options to the
   3.133 +compiler but only a single `-arch' option to the preprocessor.  Like
   3.134 +this:
   3.135 +
   3.136 +     ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
   3.137 +                 CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
   3.138 +                 CPP="gcc -E" CXXCPP="g++ -E"
   3.139 +
   3.140 +   This is not guaranteed to produce working output in all cases, you
   3.141 +may have to build one architecture at a time and combine the results
   3.142 +using the `lipo' tool if you have problems.
   3.143 +
   3.144 +Installation Names
   3.145 +==================
   3.146 +
   3.147 +   By default, `make install' installs the package's commands under
   3.148 +`/usr/local/bin', include files under `/usr/local/include', etc.  You
   3.149 +can specify an installation prefix other than `/usr/local' by giving
   3.150 +`configure' the option `--prefix=PREFIX', where PREFIX must be an
   3.151 +absolute file name.
   3.152 +
   3.153 +   You can specify separate installation prefixes for
   3.154 +architecture-specific files and architecture-independent files.  If you
   3.155 +pass the option `--exec-prefix=PREFIX' to `configure', the package uses
   3.156 +PREFIX as the prefix for installing programs and libraries.
   3.157 +Documentation and other data files still use the regular prefix.
   3.158 +
   3.159 +   In addition, if you use an unusual directory layout you can give
   3.160 +options like `--bindir=DIR' to specify different values for particular
   3.161 +kinds of files.  Run `configure --help' for a list of the directories
   3.162 +you can set and what kinds of files go in them.  In general, the
   3.163 +default for these options is expressed in terms of `${prefix}', so that
   3.164 +specifying just `--prefix' will affect all of the other directory
   3.165 +specifications that were not explicitly provided.
   3.166 +
   3.167 +   The most portable way to affect installation locations is to pass the
   3.168 +correct locations to `configure'; however, many packages provide one or
   3.169 +both of the following shortcuts of passing variable assignments to the
   3.170 +`make install' command line to change installation locations without
   3.171 +having to reconfigure or recompile.
   3.172 +
   3.173 +   The first method involves providing an override variable for each
   3.174 +affected directory.  For example, `make install
   3.175 +prefix=/alternate/directory' will choose an alternate location for all
   3.176 +directory configuration variables that were expressed in terms of
   3.177 +`${prefix}'.  Any directories that were specified during `configure',
   3.178 +but not in terms of `${prefix}', must each be overridden at install
   3.179 +time for the entire installation to be relocated.  The approach of
   3.180 +makefile variable overrides for each directory variable is required by
   3.181 +the GNU Coding Standards, and ideally causes no recompilation.
   3.182 +However, some platforms have known limitations with the semantics of
   3.183 +shared libraries that end up requiring recompilation when using this
   3.184 +method, particularly noticeable in packages that use GNU Libtool.
   3.185 +
   3.186 +   The second method involves providing the `DESTDIR' variable.  For
   3.187 +example, `make install DESTDIR=/alternate/directory' will prepend
   3.188 +`/alternate/directory' before all installation names.  The approach of
   3.189 +`DESTDIR' overrides is not required by the GNU Coding Standards, and
   3.190 +does not work on platforms that have drive letters.  On the other hand,
   3.191 +it does better at avoiding recompilation issues, and works well even
   3.192 +when some directory options were not specified in terms of `${prefix}'
   3.193 +at `configure' time.
   3.194 +
   3.195 +Optional Features
   3.196 +=================
   3.197 +
   3.198 +   If the package supports it, you can cause programs to be installed
   3.199 +with an extra prefix or suffix on their names by giving `configure' the
   3.200 +option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
   3.201 +
   3.202 +   Some packages pay attention to `--enable-FEATURE' options to
   3.203 +`configure', where FEATURE indicates an optional part of the package.
   3.204 +They may also pay attention to `--with-PACKAGE' options, where PACKAGE
   3.205 +is something like `gnu-as' or `x' (for the X Window System).  The
   3.206 +`README' should mention any `--enable-' and `--with-' options that the
   3.207 +package recognizes.
   3.208 +
   3.209 +   For packages that use the X Window System, `configure' can usually
   3.210 +find the X include and library files automatically, but if it doesn't,
   3.211 +you can use the `configure' options `--x-includes=DIR' and
   3.212 +`--x-libraries=DIR' to specify their locations.
   3.213 +
   3.214 +   Some packages offer the ability to configure how verbose the
   3.215 +execution of `make' will be.  For these packages, running `./configure
   3.216 +--enable-silent-rules' sets the default to minimal output, which can be
   3.217 +overridden with `make V=1'; while running `./configure
   3.218 +--disable-silent-rules' sets the default to verbose, which can be
   3.219 +overridden with `make V=0'.
   3.220 +
   3.221 +Particular systems
   3.222 +==================
   3.223 +
   3.224 +   On HP-UX, the default C compiler is not ANSI C compatible.  If GNU
   3.225 +CC is not installed, it is recommended to use the following options in
   3.226 +order to use an ANSI C compiler:
   3.227 +
   3.228 +     ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
   3.229 +
   3.230 +and if that doesn't work, install pre-built binaries of GCC for HP-UX.
   3.231 +
   3.232 +   On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
   3.233 +parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
   3.234 +a workaround.  If GNU CC is not installed, it is therefore recommended
   3.235 +to try
   3.236 +
   3.237 +     ./configure CC="cc"
   3.238 +
   3.239 +and if that doesn't work, try
   3.240 +
   3.241 +     ./configure CC="cc -nodtk"
   3.242 +
   3.243 +   On Solaris, don't put `/usr/ucb' early in your `PATH'.  This
   3.244 +directory contains several dysfunctional programs; working variants of
   3.245 +these programs are available in `/usr/bin'.  So, if you need `/usr/ucb'
   3.246 +in your `PATH', put it _after_ `/usr/bin'.
   3.247 +
   3.248 +   On Haiku, software installed for all users goes in `/boot/common',
   3.249 +not `/usr/local'.  It is recommended to use the following options:
   3.250 +
   3.251 +     ./configure --prefix=/boot/common
   3.252 +
   3.253 +Specifying the System Type
   3.254 +==========================
   3.255 +
   3.256 +   There may be some features `configure' cannot figure out
   3.257 +automatically, but needs to determine by the type of machine the package
   3.258 +will run on.  Usually, assuming the package is built to be run on the
   3.259 +_same_ architectures, `configure' can figure that out, but if it prints
   3.260 +a message saying it cannot guess the machine type, give it the
   3.261 +`--build=TYPE' option.  TYPE can either be a short name for the system
   3.262 +type, such as `sun4', or a canonical name which has the form:
   3.263 +
   3.264 +     CPU-COMPANY-SYSTEM
   3.265 +
   3.266 +where SYSTEM can have one of these forms:
   3.267 +
   3.268 +     OS
   3.269 +     KERNEL-OS
   3.270 +
   3.271 +   See the file `config.sub' for the possible values of each field.  If
   3.272 +`config.sub' isn't included in this package, then this package doesn't
   3.273 +need to know the machine type.
   3.274 +
   3.275 +   If you are _building_ compiler tools for cross-compiling, you should
   3.276 +use the option `--target=TYPE' to select the type of system they will
   3.277 +produce code for.
   3.278 +
   3.279 +   If you want to _use_ a cross compiler, that generates code for a
   3.280 +platform different from the build platform, you should specify the
   3.281 +"host" platform (i.e., that on which the generated programs will
   3.282 +eventually be run) with `--host=TYPE'.
   3.283 +
   3.284 +Sharing Defaults
   3.285 +================
   3.286 +
   3.287 +   If you want to set default values for `configure' scripts to share,
   3.288 +you can create a site shell script called `config.site' that gives
   3.289 +default values for variables like `CC', `cache_file', and `prefix'.
   3.290 +`configure' looks for `PREFIX/share/config.site' if it exists, then
   3.291 +`PREFIX/etc/config.site' if it exists.  Or, you can set the
   3.292 +`CONFIG_SITE' environment variable to the location of the site script.
   3.293 +A warning: not all `configure' scripts look for a site script.
   3.294 +
   3.295 +Defining Variables
   3.296 +==================
   3.297 +
   3.298 +   Variables not defined in a site shell script can be set in the
   3.299 +environment passed to `configure'.  However, some packages may run
   3.300 +configure again during the build, and the customized values of these
   3.301 +variables may be lost.  In order to avoid this problem, you should set
   3.302 +them in the `configure' command line, using `VAR=value'.  For example:
   3.303 +
   3.304 +     ./configure CC=/usr/local2/bin/gcc
   3.305 +
   3.306 +causes the specified `gcc' to be used as the C compiler (unless it is
   3.307 +overridden in the site shell script).
   3.308 +
   3.309 +Unfortunately, this technique does not work for `CONFIG_SHELL' due to
   3.310 +an Autoconf bug.  Until the bug is fixed you can use this workaround:
   3.311 +
   3.312 +     CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
   3.313 +
   3.314 +`configure' Invocation
   3.315 +======================
   3.316 +
   3.317 +   `configure' recognizes the following options to control how it
   3.318 +operates.
   3.319 +
   3.320 +`--help'
   3.321 +`-h'
   3.322 +     Print a summary of all of the options to `configure', and exit.
   3.323 +
   3.324 +`--help=short'
   3.325 +`--help=recursive'
   3.326 +     Print a summary of the options unique to this package's
   3.327 +     `configure', and exit.  The `short' variant lists options used
   3.328 +     only in the top level, while the `recursive' variant lists options
   3.329 +     also present in any nested packages.
   3.330 +
   3.331 +`--version'
   3.332 +`-V'
   3.333 +     Print the version of Autoconf used to generate the `configure'
   3.334 +     script, and exit.
   3.335 +
   3.336 +`--cache-file=FILE'
   3.337 +     Enable the cache: use and save the results of the tests in FILE,
   3.338 +     traditionally `config.cache'.  FILE defaults to `/dev/null' to
   3.339 +     disable caching.
   3.340 +
   3.341 +`--config-cache'
   3.342 +`-C'
   3.343 +     Alias for `--cache-file=config.cache'.
   3.344 +
   3.345 +`--quiet'
   3.346 +`--silent'
   3.347 +`-q'
   3.348 +     Do not print messages saying which checks are being made.  To
   3.349 +     suppress all normal output, redirect it to `/dev/null' (any error
   3.350 +     messages will still be shown).
   3.351 +
   3.352 +`--srcdir=DIR'
   3.353 +     Look for the package's source code in directory DIR.  Usually
   3.354 +     `configure' can determine that directory automatically.
   3.355 +
   3.356 +`--prefix=DIR'
   3.357 +     Use DIR as the installation prefix.  *note Installation Names::
   3.358 +     for more details, including other options available for fine-tuning
   3.359 +     the installation locations.
   3.360 +
   3.361 +`--no-create'
   3.362 +`-n'
   3.363 +     Run the configure checks, but stop before creating any output
   3.364 +     files.
   3.365 +
   3.366 +`configure' also accepts some other, not widely useful, options.  Run
   3.367 +`configure --help' for more details.
   3.368 +
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
     4.3 @@ -0,0 +1,1 @@
     4.4 +SUBDIRS=gclib gutcheck test doc
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/README	Tue Jan 24 23:54:05 2012 +0000
     5.3 @@ -0,0 +1,68 @@
     5.4 +                                   gutcheck
     5.5 +                                   ========
     5.6 +
     5.7 +General installation instructions can be found in INSTALL. The following
     5.8 +aim to give a quick overview and some help for specific systems. Documentation
     5.9 +for gutcheck itself can be found in doc/gutcheck.txt and for the test
    5.10 +framework in doc/gc-test.txt.
    5.11 +
    5.12 +Linux
    5.13 +-----
    5.14 +
    5.15 +You should be able to use the standard:
    5.16 +
    5.17 +% ./configure
    5.18 +% make
    5.19 +% sudo make install
    5.20 +
    5.21 +If you get an error about no package 'glib-2.0' found, then you need to
    5.22 +install the development package for glib2. Under Fedora, RHEL and friends
    5.23 +that would be:
    5.24 +
    5.25 +% sudo yum install gcc pkgconfig glib2-devel
    5.26 +
    5.27 +Under Debian, Ubuntu and friends that would be:
    5.28 +
    5.29 +% sudo apt-get install gcc pkgconfig glib2-devel
    5.30 +
    5.31 +If you get really stuck, you can use the --without-glib option to configure,
    5.32 +but this may well not be supported in a future version so this is probably
    5.33 +best avoided.
    5.34 +
    5.35 +Microsoft Windows
    5.36 +-----------------
    5.37 +
    5.38 +It should be possible to use MSYS (http://www.mingw.org/wiki/MSYS) to build
    5.39 +on a Windows machine. You'll need a copy of the development package for
    5.40 +glib and its dependencies from http://www.gtk.org/download/win32.php.
    5.41 +
    5.42 +It's much easier to build using a cross-compiler from Linux, if you have
    5.43 +access to such a system. Under Fedora, RHEL and friends you can do this
    5.44 +with:
    5.45 +
    5.46 +% sudo yum install mingw32-gcc pkgconfig mingw32-glib2-static \
    5.47 +  mingw32-gettext-static mingw32-iconv-static
    5.48 +% ./configure --host=i686-w64-mingw32 --disable-shared \
    5.49 +  --bindir=/gutcheck --datadir=/
    5.50 +% make
    5.51 +% mkdir build
    5.52 +% make install DESTDIR=`pwd`/build
    5.53 +
    5.54 +The contents of the build/gutcheck directory can then be copied to a
    5.55 +Microsoft Windows machine.
    5.56 +
    5.57 +Depending on the version of mingw32-gcc you use, you may need to specify a
    5.58 +different host type. If you're not sure look and see what the cross-compiler
    5.59 +is called (eg., i686-pc-mingw32-gcc) and use the prefix as the host type.
    5.60 +
    5.61 +Mac
    5.62 +---
    5.63 +
    5.64 +I think this should be quite similar to Linux, doing something like this:
    5.65 +
    5.66 +% sudo port install gcc pkgconfig glib2-devel
    5.67 +% ./configure
    5.68 +% make
    5.69 +% sudo make install
    5.70 +
    5.71 +It may also be possible to use fink instead of macports.
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/bootstrap.sh	Tue Jan 24 23:54:05 2012 +0000
     6.3 @@ -0,0 +1,6 @@
     6.4 +#!/bin/sh
     6.5 +mkdir -p config
     6.6 +aclocal && \
     6.7 +  libtoolize && \
     6.8 +  automake --foreign --add-missing && \
     6.9 +  autoconf
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/configure.ac	Tue Jan 24 23:54:05 2012 +0000
     7.3 @@ -0,0 +1,94 @@
     7.4 +#                                               -*- Autoconf -*-
     7.5 +# Process this file with autoconf to produce a configure script.
     7.6 +
     7.7 +AC_INIT([gutcheck],[1.50],[ali@juiblex.co.uk])
     7.8 +AC_PREREQ(2.59)
     7.9 +AC_CONFIG_AUX_DIR([config])
    7.10 +AC_CONFIG_SRCDIR([gutcheck/gutcheck.c])
    7.11 +AC_CONFIG_FILES([Makefile
    7.12 +gclib/Makefile
    7.13 +gutcheck/Makefile
    7.14 +test/Makefile
    7.15 +test/harness/Makefile
    7.16 +test/compatibility/Makefile
    7.17 +doc/Makefile
    7.18 +])
    7.19 +AM_INIT_AUTOMAKE(no-define)
    7.20 +AC_CANONICAL_HOST
    7.21 +
    7.22 +##################################################
    7.23 +# Checks for programs.
    7.24 +##################################################
    7.25 +AC_PROG_CC
    7.26 +LT_INIT
    7.27 +# Libtool supports a --disable-shared option to tell it to avoid
    7.28 +# building shared versions of libraries. We don't have any libraries
    7.29 +# but we do want to support building static versions of our executables.
    7.30 +# Libtool can do this (under the right circumstances) so we overload
    7.31 +# this switch for this purpose.
    7.32 +#
    7.33 +# The libtool option that we use (-static-libtool-libs) means to use
    7.34 +# static linking with libraries that supply a .la file and which
    7.35 +# include a non-empty value for "old_library". If the library doesn't
    7.36 +# include a .la file (they are deleted by some distributions), then
    7.37 +# this option will have no effect and likewise if old_library is set
    7.38 +# to '' (eg., if the library was build with --disable-static) then
    7.39 +# again -static-libtool-libs will have no effect.
    7.40 +#
    7.41 +# If old_library is set to a non-empty value, then specifying
    7.42 +# -static-libtool-libs will cause the link to fail if the old library
    7.43 +# cannot be found (libtool will not fallback to a shared library
    7.44 +# in these circumstances). This can happen with Fedora, for example,
    7.45 +# if a main mingw32 library package is installed but not the
    7.46 +# coresponding static sub-package. The solution is to either
    7.47 +# install the relevant static sub-packages or don't use --disable-shared.
    7.48 +AS_IF([test "$enable_shared" = no],[
    7.49 +  LDFLAGS="$LDFLAGS -static-libtool-libs"
    7.50 +])
    7.51 +PKG_PROG_PKG_CONFIG
    7.52 +
    7.53 +##################################################
    7.54 +# Checks for header files.
    7.55 +##################################################
    7.56 +
    7.57 +##################################################
    7.58 +# Checks for typedefs, structures, and compiler characteristics.
    7.59 +##################################################
    7.60 +
    7.61 +##################################################
    7.62 +# Checks for libraries.
    7.63 +##################################################
    7.64 +AC_MSG_CHECKING([whether to use glib])
    7.65 +AC_ARG_WITH([glib],[AS_HELP_STRING([--without-glib],
    7.66 +  [use internal re-invented wheel rather than glib2])])
    7.67 +AS_IF([test "$with_glib" != no],[
    7.68 +  AC_MSG_RESULT([yes])
    7.69 +  PKG_CHECK_MODULES([GLIB],[glib-2.0])
    7.70 +  AC_DEFINE([HAVE_GLIB],[1],[Define if you have glib version 2.x])
    7.71 +],[
    7.72 +  AC_MSG_RESULT([no])
    7.73 +])
    7.74 +AM_CONDITIONAL([HAVE_GLIB],[test "$with_glib" != no])
    7.75 +
    7.76 +# NOTE: If we are using a static version of glib then we
    7.77 +# should define GLIB_STATIC_COMPILATION. This isn't needed
    7.78 +# when glib is built only for static use (in which case
    7.79 +# glibconfig.h will already define GLIB_STATIC_COMPILATION).
    7.80 +# It's not easy to tell if libtool will actually link with
    7.81 +# a static glib but luckily we don't currently need to;
    7.82 +# this pre-processor define only affects the behaviour of
    7.83 +# libraries which use glib and we don't have any.
    7.84 +
    7.85 +##################################################
    7.86 +# Checks for library functions.
    7.87 +##################################################
    7.88 +AC_CHECK_FUNCS_ONCE([mkstemp])
    7.89 +
    7.90 +##################################################
    7.91 +# Checks for processor independent files.
    7.92 +##################################################
    7.93 +
    7.94 +##################################################
    7.95 +# Generate the various configured files
    7.96 +##################################################
    7.97 +AC_OUTPUT
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/doc/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
     8.3 @@ -0,0 +1,3 @@
     8.4 +dist_pkgdata_DATA=gutcheck.txt gc-test.txt
     8.5 +
     8.6 +EXTRA_DIST=README-0.99
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/doc/README-0.99	Tue Jan 24 23:54:05 2012 +0000
     9.3 @@ -0,0 +1,24 @@
     9.4 +RELEASE NOTES FOR GUTCHECK 0.99 20051105
     9.5 +----------------------------------------
     9.6 +
     9.7 +This is the README file for Gutcheck.
     9.8 +
     9.9 +Gutcheck is a command-line tool for finding problems in
    9.10 +files for submission to Project Gutenberg.
    9.11 +
    9.12 +You should have received the following files:
    9.13 +
    9.14 +    GUTCHECK.EXE    MS-DOS Executable
    9.15 +    gutcheck.txt    Documentation
    9.16 +    gutcheck.c      Source code
    9.17 +    gutcheck.typ    A sample typo file
    9.18 +    README          This file
    9.19 +    COPYING         A copy of the GNU GPL licence
    9.20 +
    9.21 +This program is free software, without warranty of any kind,
    9.22 +licensed under the GNU GPL.  A copy of the GNU GPL, entitled
    9.23 +'COPYING' should be present.  If not, you can find one at 
    9.24 +http://www.fsf.org.
    9.25 +
    9.26 +Gutcheck was written by Jim Tinsley, who can be reached at 
    9.27 +jtinsley@pobox.com, or via http://gutcheck.sourceforge.net.
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/doc/gc-test.txt	Tue Jan 24 23:54:05 2012 +0000
    10.3 @@ -0,0 +1,64 @@
    10.4 +                            gutcheck test framework
    10.5 +                            =======================
    10.6 +
    10.7 +Running existing testcases
    10.8 +--------------------------
    10.9 +
   10.10 +The test harness (the program that runs a test) is called gc-test. The various
   10.11 +testcases are stored in multiple text files, typically with a .tst extension.
   10.12 +
   10.13 +To run a testcase when all of gutcheck, gc-test and the testcase file are
   10.14 +in the current directory simply do something like:
   10.15 +
   10.16 +% gc-test missing-space.tst
   10.17 +
   10.18 +from a command prompt. Under MS-Windows, this is called a command window and
   10.19 +the prompt will normally look slightly different, eg.,
   10.20 +
   10.21 +C:\DP> gc-test missing-space.tst
   10.22 +
   10.23 +To run all the tests in the current directory, do something like this:
   10.24 +
   10.25 +% gc-test *.tst
   10.26 +
   10.27 +If gutcheck is not in the current directory, then you can set an environment
   10.28 +variable (GUTCHECK) to point at it. For example, on MS-Windows you might do:
   10.29 +
   10.30 +C:\DP> set GUTCHECK=C:\GUTCHECK\GUTCHECK.EXE
   10.31 +C:\DP> gc-test *.tst
   10.32 +
   10.33 +Writing your own testcases
   10.34 +--------------------------
   10.35 +
   10.36 +Writing a new testcase is pretty painless. Most testcases follow this simple
   10.37 +pattern:
   10.38 +
   10.39 +		┌──────────────────────────────────────────â”
   10.40 +		│**************** INPUT ****************   │
   10.41 +		│"Look!John, over there!"                  │
   10.42 +		│**************** EXPECTED ****************│
   10.43 +		│                                          │
   10.44 +		│"Look!John, over there!"                  │
   10.45 +		│    Line 1 column 6 - Missing space?      │
   10.46 +		└──────────────────────────────────────────┘
   10.47 +
   10.48 +The sixteen asterisks in this example form what is known as the "flag". This
   10.49 +flag must come before and after all tags (eg., INPUT and EXPECTED). In the
   10.50 +unlikely event that you need sixteen asterisks at the start of line of text,
   10.51 +then simply choose a different flag and use it throughout the file (flags
   10.52 +can be any sequence of ASCII characters except control codes and space).
   10.53 +
   10.54 +Note that the header that gutcheck normally outputs is not included in the
   10.55 +expected output. This avoids problems with not knowing beforehand the name
   10.56 +of the file that gutcheck will be asked to look at (and saves typing!).
   10.57 +gutcheck prints a blank line before each warning. These are not part of the
   10.58 +header and so do need to be included.
   10.59 +
   10.60 +To test that gutcheck produces no output, you still need to include
   10.61 +an EXPECTED tag, just with no text following it. If there is no EXPECTED
   10.62 +tag, then gc-test will consider that no expectation exists and won't check
   10.63 +the output at all.
   10.64 +
   10.65 +There is no support yet for non-ASCII testcases, embedded linefeeds,
   10.66 +passing command line options to gutcheck or for testcases which are
   10.67 +expected to fail.
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/doc/gutcheck.txt	Tue Jan 24 23:54:05 2012 +0000
    11.3 @@ -0,0 +1,742 @@
    11.4 +
    11.5 +
    11.6 +                            Gutcheck documentation
    11.7 +
    11.8 +
    11.9 +gutcheck:  lists possible common formatting errors in a Project
   11.10 +Gutenberg candidate file. It is a command line program and can be used
   11.11 +under Win32 or Unix (gutcheck.c should compile anywhere; if it doesn't,
   11.12 +tell me). For Windows-only people, there is an appendix at the end
   11.13 +with brief instructions for running it.
   11.14 +
   11.15 +
   11.16 +Current version: 0.99. Users of 0.98 see end of file for changes.
   11.17 +
   11.18 +You should also have received the licence file COPYING, a README file, 
   11.19 +gutcheck.c, the source code, and gutcheck.exe, a DOS executable, with
   11.20 +this file.
   11.21 +
   11.22 +This software is Copyright Jim Tinsley 2000-2005.
   11.23 +
   11.24 +Gutcheck comes wih ABSOLUTELY NO WARRANTY. For details, read the file COPYING.
   11.25 +This is Free Software; you may redistribute it under certain conditions (GPL).
   11.26 +
   11.27 +See http://gutcheck.sourceforge.net for the latest version.
   11.28 +
   11.29 +
   11.30 +Usage is: gutcheck [-setopxlywm] filename
   11.31 +      where:
   11.32 +      -s checks Single quotes 
   11.33 +      -e switches off Echoing of lines 
   11.34 +      -t checks Typos
   11.35 +      -o produces an Overview only
   11.36 +      -p sets strict quotes checking for Paragraphs
   11.37 +      -x (paranoid) switches OFF typo checking and extra checks
   11.38 +      -l turns off Line-end checks
   11.39 +      -y sets error messages to stdout
   11.40 +      -w is a special mode for web uploads (for future use)
   11.41 +      -v (verbose) forces individual reporting of minor problems
   11.42 +      -m interprets Markup of some common HTML tags and entities    
   11.43 +      -u warns about words in a user-defined typo file gutcheck.typ 
   11.44 +      -d ignores some DP-specific markup
   11.45 +
   11.46 +Running gutcheck without any parameters will display a brief help message.
   11.47 +
   11.48 +Sample usage: 
   11.49 +
   11.50 +    gutcheck warpeace.txt
   11.51 +
   11.52 +
   11.53 +More detail:
   11.54 +
   11.55 +    Echoing lines (-e to switch off)
   11.56 +
   11.57 +      You may find it convenient, when reviewing Gutcheck's 
   11.58 +      suggestions, to see the line that Gutcheck is questioning.
   11.59 +      That way, you can often see at a glance whether it is
   11.60 +      a real error that needs to be fixed, or a false positive
   11.61 +      that should be in the text, but Gutcheck's limited
   11.62 +      programming doesn't understand.
   11.63 +
   11.64 +      By default, gutcheck echoes these lines, but if you don't 
   11.65 +      want to see the lines referred to, -e will switch it OFF.
   11.66 +
   11.67 +
   11.68 +    Quotes (-s and -p switches)
   11.69 +
   11.70 +      Gutcheck always looks for unbalanced doublequotes in a 
   11.71 +      paragraph. It is a common convention for writers not to
   11.72 +      close quotes in a paragraph if the next paragraph opens
   11.73 +      with quotes and is a continuation by the same speaker.
   11.74 +
   11.75 +      Gutcheck therefore does not normally report unclosed quotes 
   11.76 +      if the next paragraph begins with a quote. If you need
   11.77 +      to see all unclosed quotes, even where the next paragraph
   11.78 +      begins with a quote, you should use the -p switch.
   11.79 +
   11.80 +      Singlequotes (') are a problem, since the same character
   11.81 +      is used for an apostrophe. I'm not sure that it is 
   11.82 +      possible to get 100% accuracy on singlequotes checking,
   11.83 +      particularly since dialect, quite common in PG texts,
   11.84 +      upsets the normal rules so badly. Consider the sentence:
   11.85 +        'Tis often said that a man's a man for a' that.
   11.86 +      As humans, we recognize that both apostrophes are used
   11.87 +      for contractions rather than quotes, but it isn't easy 
   11.88 +      to get a program to recognize that.
   11.89 +
   11.90 +      Since Gutcheck makes too many mistakes when trying to match
   11.91 +      singlequotes, it doesn't look for unbalanced singlequotes
   11.92 +      unless you specify the -s switch.
   11.93 +
   11.94 +      Consider these sentences, which illustrate the main cases:
   11.95 +
   11.96 +        'Tis often said that a fool and his money are soon parted.
   11.97 +
   11.98 +        'Becky's goin' home,' said Tom.
   11.99 +
  11.100 +        The dogs' tails wagged in unison.
  11.101 +
  11.102 +        Those 'pack dogs' of yours look more like wolves.
  11.103 +
  11.104 +
  11.105 +
  11.106 +    Typos (-t switch)
  11.107 +
  11.108 +      It's not Gutcheck's job to be a spelling checker, but it
  11.109 +      does check for a list of common typos and OCR errors if you
  11.110 +      use the -t switch. (The -x switch also turns typo checking on.)
  11.111 +
  11.112 +      It also checks for character combinations, especially involving
  11.113 +      h and b, which are often confused by OCR, that rarely or never
  11.114 +      occur. For example, it queries "tbe" in a word. Now, "the" often
  11.115 +      occurs, but "tbe" is very rare (heartbeat, hotbed), so I'm
  11.116 +      playing the odds - a few false positives for many errors found.
  11.117 +      Similarly with "ii", which is a very common OCR error.
  11.118 +
  11.119 +      Gutcheck suppresses multiple reporting of the first 40 "typos"
  11.120 +      found. This is to remove the annoyance of seeing something like
  11.121 +      "FN" (footnote) or "LK" (initials) flagged as a typo 147 times
  11.122 +      in a text. 
  11.123 +
  11.124 +
  11.125 +    Line-end checking (-l switch to disable)
  11.126 +
  11.127 +      All PG texts should have a Carriage Return (CR - character 13)
  11.128 +      and a Line Feed (LF - character 10) at end of each line,
  11.129 +      regardless of what O/S you made them on. DOS/Windows, Unix
  11.130 +      and Mac have different conventions, but the final text should
  11.131 +      always use a CR/LF pair as its line terminator.
  11.132 +
  11.133 +      By default, Gutcheck verifies that every line does have
  11.134 +      the correct terminator, but if you're on a work-in-progress
  11.135 +      in Linux, you might want to convert the line-ends as a final
  11.136 +      step, and not want to see thousands of errors every time you
  11.137 +      run Gutcheck before that final step, so you can turn off 
  11.138 +      this checking with the -l switch.
  11.139 +
  11.140 +
  11.141 +    Paranoid mode (-x switch to disable: Trust No One :-)
  11.142 +
  11.143 +      -x switches OFF typo-checking, the -t flag, automatically
  11.144 +      and some extra checks like standalone 1 and 0 queries.
  11.145 +
  11.146 +
  11.147 +    Overview mode (-o switch)
  11.148 +
  11.149 +       This mode just gives a count of queries found
  11.150 +       instead of a detailed list.
  11.151 +
  11.152 +
  11.153 +    Header quote  (-h switch)
  11.154 +
  11.155 +       If you use the -h switch, gutcheck will also display
  11.156 +       the Title, Author, Release and Edition fields from the
  11.157 +       PG header. This is useful mostly for the automated
  11.158 +       checks we do on recently-posted texts.
  11.159 +
  11.160 +
  11.161 +    Errors to stdout (-y switch)
  11.162 +
  11.163 +       If you're just running gutcheck normally, you can ignore
  11.164 +       this. It's only there for programs that provide a front
  11.165 +       end to gutcheck. It makes error messages appear within
  11.166 +       the output of gutcheck so that the front end knows whether
  11.167 +       gutcheck ran OK.
  11.168 +
  11.169 +
  11.170 +    Verbose reporting (-v switch)
  11.171 +
  11.172 +       Normally, if gutcheck sees lots of long lines, short lines,
  11.173 +       spaced dashes, non-ASCII characters or dot-commas ".," it
  11.174 +       assumes these are features of the text, counts and summarizes
  11.175 +       them at the top of its report, but does not list them 
  11.176 +       individually. If the -v switch is on, gutcheck will list them all.
  11.177 +
  11.178 +
  11.179 +    Markup interpretation (-m switch)
  11.180 +
  11.181 +       Normally, gutcheck flags anything it suspects of being HTML
  11.182 +       markup as a possible error. When you use the -m switch,
  11.183 +       however, it matches anything that looks like markup against
  11.184 +       a short list of common HTML tags and entities. If the markup
  11.185 +       is in that list, it either ignores the markup, in the case
  11.186 +       of a tag, or "interprets" the markup as its nearest ASCII 
  11.187 +       equivalent, in the case of an entity. So, for example, using
  11.188 +       this switch, gutcheck will "see"
  11.189 +
  11.190 +       &ldquo;He went <i>thataway!</i>&rdquo;
  11.191 +
  11.192 +       as
  11.193 +
  11.194 +       "He went thataway!"
  11.195 +
  11.196 +       and report accordingly.
  11.197 +
  11.198 +       This switch does not, not, NOT check the validity of HTML;
  11.199 +       it exists so that you can run gutcheck on most HTML texts
  11.200 +       for PG, and get sane results. It does not support all tags.
  11.201 +       It does not support all entities. When it sees a tag or entity
  11.202 +       it does not recognize, it will query it as HTML just as if
  11.203 +       you hadn't specified the -m switch.
  11.204 +
  11.205 +       Gutcheck 0.99 will automatically switch on markup interpretation
  11.206 +       if it sees a lot of tags that appear to be markup, so mostly, you
  11.207 +       won't have to specify this.
  11.208 +
  11.209 +    User-defined typos (-u switch)
  11.210 +
  11.211 +        If you have a file named gutcheck.typ either in your current
  11.212 +        working directory or in the directory from which you explicitly
  11.213 +        invoked gutcheck, but not necessarily on your path, and if you
  11.214 +        specify the -u switch, gutcheck will query any word specified 
  11.215 +        in that file. The file is simple: one word, in lower case, per
  11.216 +        line. 999 lines are allowed for. Be careful not to put multiple
  11.217 +        words onto a line, or leave any rubbish other than the word on
  11.218 +        the line. You should have received a sample file gutcheck.typ
  11.219 +        with this package.
  11.220 +
  11.221 +    Ignore DP markup (-d switch)
  11.222 +        
  11.223 +        Distributed Proofreaders (http://www.pgdp.net) is currently
  11.224 +        (2005) the main source of PG texts, and proofers there use
  11.225 +        special conventions. This switch understands those conventions,
  11.226 +        so that people can use gutcheck on files in process that still
  11.227 +        haven't had the special conventions removed yet. The special
  11.228 +        conventions supported in 0.99 are page-separators and
  11.229 +        "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/".
  11.230 +
  11.231 +
  11.232 +You will probably only run gutcheck on a text once or maybe twice,
  11.233 +just prior to uploading; it usually finds a few formatting problems;
  11.234 +it also usually finds queries that aren't problems at all - it often
  11.235 +questions Tables of Contents for having short lines, for example.
  11.236 +These are called "false positives", and need a human to decide on
  11.237 +them.
  11.238 +
  11.239 +The text should be standard prose, and already close to PG normal
  11.240 +format (plain text, about 70 characters per line with blank lines
  11.241 +between paragraphs).
  11.242 +
  11.243 +Gutcheck merely draws your attention to things that might be errors.
  11.244 +It is NOT a substitute for human judgement. Formatting choices like
  11.245 +short lines may be for a reason that this program can't understand.
  11.246 +
  11.247 +Even the most careful human proofing can leave errors behind in a
  11.248 +text, and there are several automated checks you can do to help find
  11.249 +them. Of these, spellchecking (with _very_ careful human judgement) is
  11.250 +the most important and most useful.
  11.251 +
  11.252 +Gutcheck does perform some basic typo-checking if you ask it to,
  11.253 +but its focus is on formatting errors specific to PG texts - 
  11.254 +mismatched quotes, non-ASCII characters, bad spacing, bad line
  11.255 +length, HTML tags perhaps left from a conversion, unbalanced
  11.256 +brackets.
  11.257 +
  11.258 +Suggestions for additional checks would be appreciated and duly 
  11.259 +considered, but no guarantees that they will be implemented.
  11.260 +
  11.261 +
  11.262 +
  11.263 +
  11.264 +                How do _I_ use it?
  11.265 +
  11.266 +Practically everyone I give gutcheck to asks me how _I_ use it.
  11.267 +Well, when I get a text for posting, say filename.txt, I run
  11.268 +
  11.269 +    gutcheck -o filename.txt
  11.270 +
  11.271 +That gives me a quick idea what I'm dealing with. It'll tell
  11.272 +me what kind of problems gutcheck sees, and give me an idea 
  11.273 +of how much more work needs to be done on the text. Keep in 
  11.274 +mind that gutcheck doesn't do anything like a full spellcheck,
  11.275 +but when I see a text that has a lot of problems, I assume that
  11.276 +it probably needs a spellcheck too.
  11.277 +
  11.278 +Having got a feel for the ballpark, I run
  11.279 +
  11.280 +    gutcheck filename.txt > jj
  11.281 +
  11.282 +where jj is my personal, all-purpose filename for temporary data
  11.283 +that doesn't need to be kept. Then I open filename.txt and jj in
  11.284 +a split-screen view in my editor, and work down the text, fixing
  11.285 +whatever needs fixing, and skipping whatever doesn't. If your 
  11.286 +editor doesn't split-screen, you can get much the same effect by 
  11.287 +opening your original file in your normal editor, and jj (or your
  11.288 +equivalent name) in something like Notepad, keeping both in view 
  11.289 +at the same time.
  11.290 +
  11.291 +Twice a day, an automatic process looks at all recently-posted
  11.292 +texts, and emails Michael, me, and sometimes other people with
  11.293 +their gutcheck summaries.
  11.294 +
  11.295 +
  11.296 +
  11.297 +        Future development of gutcheck
  11.298 +
  11.299 +Gutcheck has gone about as far as it can, given its current
  11.300 +structure. In order to add better singlequotes checking,
  11.301 +sentence checking, better he/be checking and other good stuff
  11.302 +that I'd like to see, I'll have to rewrite it from a different
  11.303 +angle - looking at the syntax instead of the lines. And I'll
  11.304 +probably get around to that sooner or later.
  11.305 +
  11.306 +Meantime, I'm just trying to get this version stabilized, so
  11.307 +please report any bugs you find. When it is stable, I'll run
  11.308 +up a Windows port for those timid souls who can't look a 
  11.309 +command line in the eye. :-)
  11.310 +
  11.311 +And I've started work on gutspell, a companion to gutcheck
  11.312 +which will concentrate on spelling problems. PG spelling
  11.313 +problems are unusual, since the range of texts we cover is
  11.314 +so wide, and I'll be taking a somewhat unorthodox approach
  11.315 +to writing this spelling-checker _specifically_ for texts
  11.316 +containing a lot of dialect and uncommon words that have
  11.317 +probably already been spell-checked against a standard
  11.318 +modern dictionary.
  11.319 +
  11.320 +
  11.321 +
  11.322 +
  11.323 +Explanations of common gutcheck messages:
  11.324 +
  11.325 +    --> 74 lines in this file have white space at end
  11.326 +
  11.327 +    PG texts shouldn't have extra white space added at end of line.
  11.328 +    Don't worry too much about this; they're not doing any harm,
  11.329 +    and they'll be removed during posting anyway.
  11.330 +
  11.331 +
  11.332 +    --> 348 lines in this file are short. Not reporting short lines.
  11.333 +    --> 84 lines in this file are long. Not reporting long lines.
  11.334 +    --> 8 lines in this file are VERY long!
  11.335 +
  11.336 +    If there are a lot of long or short lines, Gutcheck won't list
  11.337 +    them individually. The short lines version of this message
  11.338 +    is commonly seen when gutchecking poetry and some plays, where
  11.339 +    the normal line length is shorter than the standard for prose.
  11.340 +    A "VERY long" line is one over 80 characters.  You normally
  11.341 +    shouldn't have any of these, but sometimes you may have to render
  11.342 +    a table that must be that long, or some special preformatted
  11.343 +    quotation that can't be broken.
  11.344 +
  11.345 +
  11.346 +    --> There are 75 spaced dashes and em-dashes in this file. Not reporting them.
  11.347 +
  11.348 +    The PG standard for an emdash--like these--is two minus signs
  11.349 +    with no spaces before or after them. However, some older texts
  11.350 +    used spaced dashes - like these -- and if there are very many
  11.351 +    such spaced dashes in the file, gutcheck just draws your
  11.352 +    attention to it and doesn't list them individually.
  11.353 +
  11.354 +
  11.355 +
  11.356 +    Line 3020 - Non-ASCII character 233
  11.357 +
  11.358 +    Standard PG texts should use only ASCII characters with values
  11.359 +    up to 127; however, non-English, accented characters can be 
  11.360 +    represented according to several different non-ASCII encoding 
  11.361 +    schemes, using values over 127. If you have a plain English text
  11.362 +    with a few accented characters in words like cafe or tete-a-tete,
  11.363 +    you should replace the accented characters with their unaccented 
  11.364 +    versions. The English pound sign is another commonly-seen
  11.365 +    non-ASCII character. If you have enough non-ASCII characters in
  11.366 +    your text that you feel removing them would degrade your text
  11.367 +    unacceptably, you should probably consider doing an 8-bit text
  11.368 +    as well as a plain-ASCII version.
  11.369 +
  11.370 +
  11.371 +
  11.372 +    Line 1207 - Non-ISO-8859 character 156
  11.373 +
  11.374 +    Even in "8-bit" texts, there are distinctions between code sets.
  11.375 +    The ISO-8859 family of 8-bit code sets is the most commonly used
  11.376 +    in PG, and these sets do not define values in the range 128 through
  11.377 +    159 as printable characters. It's quite common for someone on a
  11.378 +    Windows or Mac machine to use a non-ISO character inadvertently,
  11.379 +    so this message warns that the character is not only not ASCII,
  11.380 +    but also outside the ISO-8859 range.
  11.381 +
  11.382 +
  11.383 +
  11.384 +    Line 46 - Tab character?
  11.385 +
  11.386 +    Some editors and WPs will put in Tab characters (character 9) to
  11.387 +    indicate indented text. You should not use these in a PG text,
  11.388 +    because you can't be sure how they will appear on a reader's
  11.389 +    screen. Find the Tab, and replace it with the appropriate number
  11.390 +    of spaces.
  11.391 +
  11.392 +
  11.393 +    Line 1327 - Tilde character?
  11.394 +
  11.395 +    The tilde character (~) might be legitimately used, but it's the
  11.396 +    character commonly used by OCR software to indicate a place where
  11.397 +    it couldn't make out the letter, so gutcheck flags it.
  11.398 +
  11.399 +
  11.400 +
  11.401 +    Line 1347 - Asterisk?
  11.402 +
  11.403 +    Asterisks are reported only in paranoid mode (see -x). 
  11.404 +    Like tildes, they are often used to indicate errors, but they are
  11.405 +    also legitimately used as line delimiters and footnote markers.
  11.406 +
  11.407 +
  11.408 +
  11.409 +    Line 1451 - Long line 129
  11.410 +
  11.411 +    PG texts should have lines shorter than 76. There may be occasions
  11.412 +    where you decide that you really have to go out to 79 characters,
  11.413 +    but the sample above says that line 1451 is 129 characters long -
  11.414 +    probably two lines run together.
  11.415 +
  11.416 +
  11.417 +
  11.418 +    Line 1590 - Short line?
  11.419 +
  11.420 +    PG texts should have lines longer than 54 characters. However,
  11.421 +    there are special cases like poetry and tables of contents where
  11.422 +    the lines _should_ be shorter. So treat Gutcheck warnings about
  11.423 +    short lines carefully. Sometimes it's a genuine formatting
  11.424 +    problem; sometimes the line really needs to be short.
  11.425 +
  11.426 +    Hint: gutcheck will not flag lines as short if they are indented
  11.427 +    - if they start with a space. I like to start inserted stanzas
  11.428 +    and other such items indented with a couple of spaces so that 
  11.429 +    they stand out from the main text anyway.
  11.430 +
  11.431 +
  11.432 +
  11.433 +    Line 1804 - Begins with punctuation?
  11.434 +
  11.435 +    Lines should normally not begin with commas, periods and so on.
  11.436 +    An exception is ellipses . . . which can happen at start of line.
  11.437 +
  11.438 +
  11.439 +
  11.440 +    Line 1850 - Spaced em-dash?
  11.441 +
  11.442 +    The PG standard for an em-dash--like these--is two minus signs
  11.443 +    with no spaces before or after them. Gutcheck flags non-PG
  11.444 +    em-dashes - like this one. Normally, you will replace it with a 
  11.445 +    PG-standard em-dash.
  11.446 +
  11.447 +
  11.448 +
  11.449 +    Line 1904 - Query he/be error?
  11.450 +
  11.451 +    Gutcheck makes a very minor effort to look for that scourge of all
  11.452 +    proofreaders, "be" replacing "he" or vice-versa, and draws your
  11.453 +    attention to it when it thinks it has found one.
  11.454 +
  11.455 +
  11.456 +
  11.457 +    Line 2017 - Query digit in a1most
  11.458 +
  11.459 +    The digit 1 is commonly OCRed for the letter l, the digit 0 for
  11.460 +    the letter O, and so on. When gutcheck sees a mix of digits and
  11.461 +    letters, it warns you. It may generate a false positive for
  11.462 +    something like 7am.
  11.463 +
  11.464 +
  11.465 +
  11.466 +    Line 2083 - Query standalone 0
  11.467 +
  11.468 +    In paranoid mode (see -x) only, gutcheck warns about the digit 0 
  11.469 +    and the number 1 standing alone as a word. This can happen if the 
  11.470 +    OCR misreads the words O or I.
  11.471 +
  11.472 +
  11.473 +
  11.474 +    Line 2115 - Query word whetber
  11.475 +
  11.476 +    If you have switched typo-checking on, gutcheck looks for
  11.477 +    potential typos, especially common h/b errors. It's not
  11.478 +    infallible; it sometimes queries legit words, but it's
  11.479 +    always worth taking a look.
  11.480 +
  11.481 +
  11.482 +
  11.483 +    Line 2190 column 14 - Missing space?
  11.484 +
  11.485 +    Omitting a space is a very common error,especially coming from
  11.486 +    OCRed text,and can be hard for a human to spot. The commas in
  11.487 +    the previous sentence illustrate the kind of thing I mean.
  11.488 +
  11.489 +
  11.490 +
  11.491 +    Line 2240 column 48 - Spaced punctuation?
  11.492 +
  11.493 +    The flip side of the "missing space" error , here , is when extra
  11.494 +    spaces are added before punctuation . Some old texts appear to add
  11.495 +    extra spaces around punctuation consistently, but this was a
  11.496 +    typographical convention rather than the author's intent, and the
  11.497 +    extra "spaces" should be removed when preparing a PG text.
  11.498 +
  11.499 +
  11.500 +
  11.501 +    Line 2301 column 19 - Unspaced quotes?
  11.502 +
  11.503 +    Another common spacing problem occurs in a phrase like "You wait
  11.504 +    there,"he said.
  11.505 +
  11.506 +
  11.507 +
  11.508 +    Line 2385 column 27 - Wrongspaced quotes?
  11.509 +
  11.510 +    As of version 0.98, gutcheck adds extra checks on whether a quote
  11.511 +    seems to be a start or end quote, and queries those that appear to
  11.512 +    be misplaced. This does give rise to false positives when quotes are
  11.513 +    nested, for example:
  11.514 +
  11.515 +    "And how," she asked, "will your "friends" help you now?"
  11.516 +
  11.517 +    but these false positives are worth it because of the many cases
  11.518 +    that this test catches, notably those like:
  11.519 +
  11.520 +    "And how, "she said," will your friends help you now?"
  11.521 +
  11.522 +    Sometimes a "wrongspaced quotes" query will arise because an earlier
  11.523 +    quote in the paragraph was omitted, so if the place specified seems
  11.524 +    to be OK, look back to see whether there's a problem in the preceding
  11.525 +    lines.
  11.526 +
  11.527 +
  11.528 +
  11.529 +    Line 2400 - HTML Tag? <PRE>
  11.530 +
  11.531 +    Some PG texts have been converted from HTML, and not all of the
  11.532 +    HTML tags have been removed.
  11.533 +
  11.534 +
  11.535 +
  11.536 +    Line 2402 - HTML symbol? &emdash;
  11.537 +
  11.538 +    Similarly, special HTML symbol characters can survive into PG
  11.539 +    texts. Can occasionally produce amusing false positives like
  11.540 +    . . . Marwick & Co were well known for it;
  11.541 +
  11.542 +
  11.543 +
  11.544 +    Line 2540 - Mismatched quotes
  11.545 +
  11.546 +    Another gutcheck mainstay - unclosed doublequotes in a paragraph.
  11.547 +    See the discussion of quotes in the switches section near the
  11.548 +    start of this file.
  11.549 +    
  11.550 +    Since the mismatch doesn't occur on any one line, gutcheck quotes
  11.551 +    the line number of the first blank line following the paragraph,
  11.552 +    since this is the point where it reconciles the count of quotes.
  11.553 +    However, if gutcheck is echoing lines, that is, you haven't used
  11.554 +    the -e switch, it will show the _first_ line of the paragraph, 
  11.555 +    to help you find the place without using line numbers. The 
  11.556 +    offending paragraph is therefore between the quoted line and 
  11.557 +    the line number given.
  11.558 +
  11.559 +
  11.560 +
  11.561 +    Line 2587 - Mismatched single quotes
  11.562 +
  11.563 +    Only checked with the -s switch, since checking single quotes is 
  11.564 +    not a very reliable process. Otherwise, the same logic as for 
  11.565 +    doublequotes applies.
  11.566 +
  11.567 +
  11.568 +
  11.569 +    Line 2877 - Mismatched round brackets?
  11.570 +
  11.571 +    Also curly and square brackets. Texts with a lot of brackets, like
  11.572 +    plays with bracketed stage instructions, may have mismatches.
  11.573 +
  11.574 +
  11.575 +    Line 3150 - No CR?
  11.576 +    Line 3204 - Two successive CRs?
  11.577 +    Line 3281 position 75 - CR without LF?
  11.578 +
  11.579 +    These are the invalid line-end warnings. See the discussion of
  11.580 +    line-end checking in the switches section near the start of this
  11.581 +    file. If you see these, and your editor doesn't show anything
  11.582 +    wrong, you should probably try deleting the characters just before
  11.583 +    and after the line end, and the line-end itself, then retyping the
  11.584 +    characters and the line-end.
  11.585 +
  11.586 +
  11.587 +    Line 2940 - Paragraph starts with lower-case
  11.588 +
  11.589 +    A common error in an e-text is for an extra blank line
  11.590 +
  11.591 +    to be put in, like the blank line above, and this often
  11.592 +    shows up as a new paragraph beginning with lower case.
  11.593 +    Sometimes the blank line is deliberate, as when a 
  11.594 +    quotation is inserted in a speech. Use your judgement.
  11.595 +
  11.596 +
  11.597 +    Line 2987 - Extra period?
  11.598 +
  11.599 +    An extra period. is a. common problem in OCRed text. and usually
  11.600 +    arises when a speck of dust on the page is mistaken for a period.
  11.601 +    or. as occasionally happens. when a comma loses its tail.
  11.602 +
  11.603 +
  11.604 +    Line 3012 column 12 - Double punctuation?
  11.605 +
  11.606 +    Double punctuation., like that,, is a common typo and
  11.607 +    scanno. Some books have much legit double punctuation,
  11.608 +    like etc., etc., but it's worth checking anyway.
  11.609 +
  11.610 +
  11.611 +
  11.612 +            *       *       *        *
  11.613 +
  11.614 +For Windows-only users who are unfamiliar with DOS:
  11.615 +
  11.616 +    If you're a Windows-only user, you need to save
  11.617 +    gutcheck.exe into the folder (directory) where the
  11.618 +    text file you want to check is. Let's say your
  11.619 +    text file is in C:\GUT, then you should save
  11.620 +    GUTCHECK.EXE into C:\GUT.
  11.621 +
  11.622 +    Now get to a DOS prompt. You can do this by
  11.623 +    selecting the "Command Prompt" or "MS-DOS Prompt"
  11.624 +    option that will be somewhere on your
  11.625 +    Start/Programs menu.
  11.626 +
  11.627 +    Now get into the C:\GUT directory. 
  11.628 +    You can do this using the CD (change directory) 
  11.629 +    command, like this:
  11.630 +        CD \GUT
  11.631 +    and your prompt will change to 
  11.632 +        C:\GUT>
  11.633 +    so you know you're in the right place.
  11.634 +
  11.635 +    Now type
  11.636 +        gutcheck yourfile.txt
  11.637 +    and you'll see gutcheck's report
  11.638 +
  11.639 +    By default, gutcheck prints its queries to screen.
  11.640 +    If you want to create a file of them, to edit
  11.641 +    against the text, you can use the greater-than
  11.642 +    sign (>) to tell it to output the report to a
  11.643 +    file. For example, if you want its report in a
  11.644 +    file called QUERIES.LST, you could type
  11.645 +    
  11.646 +        gutcheck yourfile.txt > queries.lst
  11.647 +
  11.648 +    The queries.lst file will then contain the listing
  11.649 +    of possible formatting errors, and you can
  11.650 +    edit it alongside your text.
  11.651 +
  11.652 +    Whatever you do, DON'T make the filename after
  11.653 +    the greater-than sign the name of a file already
  11.654 +    on your disk that you want to keep, because
  11.655 +    the greater-than sign will cause gutcheck to
  11.656 +    replace any existing file of that name.
  11.657 +
  11.658 +    So, for example, if you have two Tolstoy files
  11.659 +    that you want to check, called WARPEACE.TXT and 
  11.660 +    ANNAK.TXT, make sure that neither of these names
  11.661 +    is ever used following the greater-than sign.
  11.662 +    To check these correctly, you might do:
  11.663 +
  11.664 +    gutcheck warpeace.txt >war.lst
  11.665 +
  11.666 +    and
  11.667 +
  11.668 +    gutcheck annak.txt > annak.lst
  11.669 +
  11.670 +    separately. Then you can look at war.lst and annak.lst
  11.671 +    to see the gutcheck reports.
  11.672 +
  11.673 +            *       *       *        *
  11.674 +
  11.675 +
  11.676 +For existing 0.98 users upgrading to 0.99:
  11.677 +
  11.678 +    If you run on old 16-bit DOS or Windows 3.x, I'm afraid
  11.679 +    you're out of luck. I'm not saying it _can't_ be compiled
  11.680 +    to run on 16-bit, but the executable with the package is
  11.681 +    for Win32 only. *nix users won't notice the change at all.
  11.682 +
  11.683 +
  11.684 +    There are two new switches: -u and -d. 
  11.685 +          See above for full rundown.
  11.686 +
  11.687 +
  11.688 +Here's a list of the new errors:
  11.689 +
  11.690 +    Line 1456 - Carat character?
  11.691 +
  11.692 +    I^ve found a few.
  11.693 +
  11.694 +
  11.695 +    Line 1821 - Forward slash?
  11.696 +
  11.697 +    Common error for italicized "I", or so /'ve found.
  11.698 +
  11.699 +
  11.700 +    Line 2139 - Query missing paragraph break?
  11.701 +
  11.702 +    "Come here, son." "Do I _have_ to go, dad?"
  11.703 +    Like that. False positives in some texts. Sorry 'bout that,
  11.704 +    but these are often errors.
  11.705 +
  11.706 +
  11.707 +    Line 2200 - Query had/bad error?
  11.708 +
  11.709 +    Clear enough. Doesn't catch as many as I'd like it to,
  11.710 +    but rarely gives false alarms.
  11.711 +
  11.712 +
  11.713 +    Line 2268 - Query punctuation after the?
  11.714 +
  11.715 +    Some words, like "the", very rarely have punctuation
  11.716 +    following them. Others, like "Mrs", usually have a
  11.717 +    period, but never a comma. Occasional false positives.
  11.718 +
  11.719 +
  11.720 +    Line 2380 - Query possible scanno arid
  11.721 +
  11.722 +    It found one of your user-defined typos when you
  11.723 +    used the -u switch.
  11.724 +
  11.725 +
  11.726 +    Line 2511 - Capital "S"?
  11.727 +
  11.728 +    Surprisingly common specific case, like: Jane'S 
  11.729 +
  11.730 +    
  11.731 +    Line 3469 - endquote missing punctuation?
  11.732 +
  11.733 +    OK. This one can really cause a lot of false positives
  11.734 +    in some books, but it switches itself off if it finds
  11.735 +    more than 20 in a text, unless you force it to list them
  11.736 +    all with the -v switch.
  11.737 +    "Hey, dad" Johnny said, "can we go now?"
  11.738 +    is a common punctuation-missing error.
  11.739 +
  11.740 +
  11.741 +    Line 4266 - Mismatched underscores?
  11.742 +
  11.743 +    Like mismatched anything else!
  11.744 +
  11.745 +
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/gclib/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
    12.3 @@ -0,0 +1,10 @@
    12.4 +INCLUDES=-I$(top_srcdir)
    12.5 +AM_CFLAGS=$(GLIB_CFLAGS)
    12.6 +LIBS=$(GLIB_LIBS)
    12.7 +
    12.8 +noinst_LTLIBRARIES=libgc.la
    12.9 +libgc_la_SOURCES=gclib.h textfileutils.c textfileutils.h spawn.c spawn.h
   12.10 +if !HAVE_GLIB
   12.11 +libgc_la_SOURCES+=macros.h types.h fileutils.c fileutils.h mem.c mem.h \
   12.12 +  strfuncs.c strfuncs.h gcstring.c gcstring.h utils.c utils.h
   12.13 +endif
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/gclib/fileutils.c	Tue Jan 24 23:54:05 2012 +0000
    13.3 @@ -0,0 +1,46 @@
    13.4 +#include <stdlib.h>
    13.5 +#include <stdio.h>
    13.6 +#include <gclib/macros.h>
    13.7 +#include <gclib/mem.h>
    13.8 +#include <gclib/fileutils.h>
    13.9 +#include <gclib/gcstring.h>
   13.10 +
   13.11 +/*
   13.12 + * Read a file into memory (which should be freed with mem_free when no
   13.13 + * longer required). Returns FALSE on error and outputs a suitable error
   13.14 + * message to stderr.
   13.15 + */
   13.16 +boolean file_get_contents(const char *filename,char **contents,size_t *length)
   13.17 +{
   13.18 +    FILE *fp;
   13.19 +    size_t n;
   13.20 +    char *buffer;
   13.21 +    String *string;
   13.22 +    fp=fopen(filename,"rb");
   13.23 +    if (!fp)
   13.24 +    {
   13.25 +	perror(filename);
   13.26 +	return FALSE;
   13.27 +    }
   13.28 +    buffer=mem_new(char,1024);
   13.29 +    string=string_new(NULL);
   13.30 +    do
   13.31 +    {
   13.32 +	n=fread(buffer,1,1024,fp);
   13.33 +	if (n<0)
   13.34 +	{
   13.35 +	    perror(filename);
   13.36 +	    string_free(string,TRUE);
   13.37 +	    mem_free(buffer);
   13.38 +	    free(fp);
   13.39 +	    return FALSE;
   13.40 +	}
   13.41 +	string_append_len(string,buffer,n);
   13.42 +    } while(n);
   13.43 +    mem_free(buffer);
   13.44 +    if (length)
   13.45 +	*length=string->len;
   13.46 +    *contents=string_free(string,FALSE);
   13.47 +    fclose(fp);
   13.48 +    return TRUE;
   13.49 +}
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/gclib/fileutils.h	Tue Jan 24 23:54:05 2012 +0000
    14.3 @@ -0,0 +1,8 @@
    14.4 +#ifndef GC_FILEUTILS_H
    14.5 +#define GC_FILEUTILS_H
    14.6 +
    14.7 +#include <gclib/types.h>
    14.8 +
    14.9 +boolean file_get_contents(const char *filename,char **contents,size_t *length);
   14.10 +
   14.11 +#endif /* GC_FILEUTILS_H */
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/gclib/gclib.h	Tue Jan 24 23:54:05 2012 +0000
    15.3 @@ -0,0 +1,36 @@
    15.4 +#if HAVE_GLIB
    15.5 +
    15.6 +#include <glib.h>
    15.7 +#define GC_DIR_SEPARATOR G_DIR_SEPARATOR
    15.8 +#define GC_DIR_SEPARATOR_S G_DIR_SEPARATOR_S
    15.9 +#define GC_IS_DIR_SEPARATOR(c) G_IS_DIR_SEPARATOR(c)
   15.10 +#define boolean gboolean
   15.11 +#define String GString
   15.12 +#define mem_new0 g_new0
   15.13 +#define mem_free g_free
   15.14 +#define str_dup g_strdup
   15.15 +#define str_ndup g_strndup
   15.16 +#define path_get_basename g_path_get_basename
   15.17 +#define file_get_contents(filename,contents,length) \
   15.18 +  g_file_get_contents(filename,contents,length,NULL)
   15.19 +#define string_new g_string_new
   15.20 +#define string_append g_string_append
   15.21 +#define string_append_len g_string_append_len
   15.22 +#define string_append_c g_string_append_c
   15.23 +#define string_free g_string_free
   15.24 +#define string_set_size g_string_set_size
   15.25 +
   15.26 +#else	/* !HAVE_GLIB */
   15.27 +
   15.28 +#include <gclib/macros.h>
   15.29 +#include <gclib/types.h>
   15.30 +#include <gclib/mem.h>
   15.31 +#include <gclib/fileutils.h>
   15.32 +#include <gclib/strfuncs.h>
   15.33 +#include <gclib/gcstring.h>
   15.34 +#include <gclib/utils.h>
   15.35 +
   15.36 +#endif	/* HAVE_GLIB */
   15.37 +
   15.38 +#include <gclib/textfileutils.h>
   15.39 +#include <gclib/spawn.h>
    16.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    16.2 +++ b/gclib/gcstring.c	Tue Jan 24 23:54:05 2012 +0000
    16.3 @@ -0,0 +1,90 @@
    16.4 +#include <stdlib.h>
    16.5 +#include <string.h>
    16.6 +#include <gclib/gcstring.h>
    16.7 +#include <gclib/types.h>
    16.8 +#include <gclib/mem.h>
    16.9 +#include <gclib/strfuncs.h>
   16.10 +
   16.11 +/*
   16.12 + * Strings which manage their own memory
   16.13 + */
   16.14 +
   16.15 +String *string_new(const char *init)
   16.16 +{
   16.17 +    String *string=mem_new(String,1);
   16.18 +    if (!init)
   16.19 +	init="";
   16.20 +    string->len=strlen(init);
   16.21 +    string->alloc=string->len+1;
   16.22 +    string->str=str_dup(init);
   16.23 +    return string;
   16.24 +}
   16.25 +
   16.26 +/*
   16.27 + * Free a string and either return the contents (if free_segment is FALSE)
   16.28 + * or free the contents as well and return NULL (if free_segment is TRUE).
   16.29 + */
   16.30 +char *string_free(String *string,boolean free_segment)
   16.31 +{
   16.32 +    char *retval;
   16.33 +    if (free_segment)
   16.34 +    {
   16.35 +	mem_free(string->str);
   16.36 +	retval=NULL;
   16.37 +    }
   16.38 +    else
   16.39 +	retval=string->str;
   16.40 +    mem_free(string);
   16.41 +    return retval;
   16.42 +}
   16.43 +
   16.44 +/*
   16.45 + * Append a byte to string.
   16.46 + */
   16.47 +void string_append_c(String *string,char c)
   16.48 +{
   16.49 +    if (string->len+1==string->alloc)
   16.50 +    {
   16.51 +	string->alloc*=2;
   16.52 +	string->str=mem_renew(char,string->str,string->alloc);
   16.53 +    }
   16.54 +    string->str[string->len++]=c;
   16.55 +    string->str[string->len]='\0';
   16.56 +}
   16.57 +
   16.58 +/*
   16.59 + * Append len bytes from s to string. len may be passed as <0 if s is
   16.60 + * a nul-terminated string of unknown length.
   16.61 + */
   16.62 +void string_append_len(String *string,const char *s,ssize_t len)
   16.63 +{
   16.64 +    if (len<0)
   16.65 +	len=strlen(s);
   16.66 +    if (string->len+len>=string->alloc)
   16.67 +    {
   16.68 +	while (string->len+len>=string->alloc)
   16.69 +	    string->alloc*=2;
   16.70 +	string->str=mem_renew(char,string->str,string->alloc);
   16.71 +    }
   16.72 +    memcpy(string->str+string->len,s,len);
   16.73 +    string->len+=len;
   16.74 +    string->str[string->len]='\0';
   16.75 +}
   16.76 +
   16.77 +/*
   16.78 + * Sets the length of a String. If the length is less than the current length,
   16.79 + * the string will be truncated. If the length is greater than the current
   16.80 + * length, the contents of the newly added area are undefined. (However, as
   16.81 + * always, string->str[string->len] will be a nul byte.)
   16.82 + */
   16.83 +void string_set_size(String *string,size_t len)
   16.84 +{
   16.85 +    if (len>=string->alloc)
   16.86 +    {
   16.87 +	while (len>=string->alloc)
   16.88 +	    string->alloc*=2;
   16.89 +	string->str=mem_renew(char,string->str,string->alloc);
   16.90 +    }
   16.91 +    string->len=len;
   16.92 +    string->str[string->len]='\0';
   16.93 +}
    17.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    17.2 +++ b/gclib/gcstring.h	Tue Jan 24 23:54:05 2012 +0000
    17.3 @@ -0,0 +1,18 @@
    17.4 +#ifndef GC_STRING_H
    17.5 +#define GC_STRING_H
    17.6 +
    17.7 +#include <unistd.h>
    17.8 +#include <gclib/types.h>
    17.9 +
   17.10 +typedef struct {
   17.11 +    char *str;
   17.12 +    size_t alloc,len;
   17.13 +} String;
   17.14 +
   17.15 +String *string_new(const char *init);
   17.16 +char *string_free(String *string,boolean free_segment);
   17.17 +void string_append_c(String *string,char c);
   17.18 +void string_append_len(String *string,const char *s,ssize_t len);
   17.19 +#define string_append(string,s)		string_append_len(string,s,-1)
   17.20 +
   17.21 +#endif /* GC_STRING_H */
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/gclib/macros.h	Tue Jan 24 23:54:05 2012 +0000
    18.3 @@ -0,0 +1,7 @@
    18.4 +#ifndef FALSE
    18.5 +#define FALSE	0
    18.6 +#endif
    18.7 +
    18.8 +#ifndef TRUE
    18.9 +#define TRUE	(!FALSE)
   18.10 +#endif
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/gclib/mem.c	Tue Jan 24 23:54:05 2012 +0000
    19.3 @@ -0,0 +1,54 @@
    19.4 +#include <stdlib.h>
    19.5 +#include <stdio.h>
    19.6 +#include <string.h>
    19.7 +#include <gclib/mem.h>
    19.8 +
    19.9 +/*
   19.10 + * A memory allocator that aborts on failure (so that the caller never
   19.11 + * needs to handle out of memory, which we assume is very unlikely to
   19.12 + * happen under normal circumstances on any modern machine).
   19.13 + */
   19.14 +void *mem_alloc(size_t nmemb,size_t size)
   19.15 +{
   19.16 +    void *ptr=malloc(nmemb*size);
   19.17 +    if (!ptr)
   19.18 +    {
   19.19 +	fprintf(stderr,
   19.20 +	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
   19.21 +	  (unsigned long)nmemb,(unsigned long)size);
   19.22 +	abort();
   19.23 +    }
   19.24 +    return ptr;
   19.25 +}
   19.26 +
   19.27 +/*
   19.28 + * As mem_new, but new memory is cleared to zero.
   19.29 + */
   19.30 +void *mem_alloc0(size_t nmemb,size_t size)
   19.31 +{
   19.32 +    void *ptr=calloc(nmemb,size);
   19.33 +    if (!ptr)
   19.34 +    {
   19.35 +	fprintf(stderr,
   19.36 +	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
   19.37 +	  (unsigned long)nmemb,(unsigned long)size);
   19.38 +	abort();
   19.39 +    }
   19.40 +    return ptr;
   19.41 +}
   19.42 +
   19.43 +/*
   19.44 + * Grow or shrink a memory block, aborting on failure.
   19.45 + */
   19.46 +void *mem_realloc(void *ptr,size_t nmemb,size_t size)
   19.47 +{
   19.48 +    ptr=realloc(ptr,nmemb*size);
   19.49 +    if (!ptr)
   19.50 +    {
   19.51 +	fprintf(stderr,
   19.52 +	  "Not enough memory to allocate %lu elements of %lu bytes.\n",
   19.53 +	  (unsigned long)nmemb,(unsigned long)size);
   19.54 +	abort();
   19.55 +    }
   19.56 +    return ptr;
   19.57 +}
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/gclib/mem.h	Tue Jan 24 23:54:05 2012 +0000
    20.3 @@ -0,0 +1,13 @@
    20.4 +#ifndef GC_MEM_H
    20.5 +#define GC_MEM_H
    20.6 +
    20.7 +void *mem_alloc(size_t nmemb,size_t size);
    20.8 +void *mem_alloc0(size_t nmemb,size_t size);
    20.9 +void *mem_realloc(void *ptr,size_t nmemb,size_t size);
   20.10 +
   20.11 +#define mem_new(type,n)		((type *)mem_alloc(n,sizeof(type)))
   20.12 +#define mem_new0(type,n)	((type *)mem_alloc0(n,sizeof(type)))
   20.13 +#define mem_renew(type,ptr,n)	((type *)mem_realloc(ptr,n,sizeof(type)))
   20.14 +#define mem_free(ptr)		free(ptr)
   20.15 +
   20.16 +#endif /* GC_MEM_H */
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/gclib/spawn.c	Tue Jan 24 23:54:05 2012 +0000
    21.3 @@ -0,0 +1,84 @@
    21.4 +#include <stdlib.h>
    21.5 +#include <stdio.h>
    21.6 +#ifndef WIN32
    21.7 +#include <sys/wait.h>
    21.8 +#endif
    21.9 +#include <gclib/gclib.h>
   21.10 +
   21.11 +#define SPAWN_BUFSIZE	128
   21.12 +
   21.13 +boolean spawn_sync(char **argv,char **standard_output,int *exit_status)
   21.14 +{
   21.15 +/* Don't use g_spawn_sync on WIN32 for now to avoid needing the helper */
   21.16 +#if HAVE_GLIB && !defined(WIN32)
   21.17 +    char *standard_error;
   21.18 +    GError *error=NULL;
   21.19 +    gboolean retval;
   21.20 +    GSpawnFlags flags=G_SPAWN_SEARCH_PATH;
   21.21 +    if (!standard_output)
   21.22 +	flags=G_SPAWN_STDOUT_TO_DEV_NULL;
   21.23 +    retval=g_spawn_sync(NULL,argv,NULL,flags,NULL,NULL,standard_output,
   21.24 +      &standard_error,exit_status,&error);
   21.25 +    fputs(standard_error,stderr);
   21.26 +    g_free(standard_error);
   21.27 +    if (!retval)
   21.28 +    {
   21.29 +	fprintf(stderr,"%s\n",error->message);
   21.30 +	g_error_free(error);
   21.31 +    }
   21.32 +    else if (exit_status)
   21.33 +	*exit_status=WEXITSTATUS(*exit_status);
   21.34 +    return retval;
   21.35 +#else
   21.36 +    FILE *fp;
   21.37 +    int i,r;
   21.38 +    size_t n,len;
   21.39 +    String *command_line,*string;
   21.40 +    command_line=string_new(NULL);
   21.41 +    for(i=0;argv[i];i++)
   21.42 +    {
   21.43 +	if (i)
   21.44 +	    string_append_c(command_line,' ');
   21.45 +	string_append(command_line,argv[i]);
   21.46 +    }
   21.47 +    fp=popen(command_line->str,"r");
   21.48 +    string_free(command_line,TRUE);
   21.49 +    if (!fp)
   21.50 +    {
   21.51 +	perror(command_line->str);
   21.52 +	return FALSE;
   21.53 +    }
   21.54 +    string=string_new(NULL);
   21.55 +    do
   21.56 +    {
   21.57 +	len=string->len;
   21.58 +	string_set_size(string,len+SPAWN_BUFSIZE);
   21.59 +	n=fread(string->str+len,1,SPAWN_BUFSIZE,fp);
   21.60 +	if (n<0)
   21.61 +	{
   21.62 +	    perror("fread");
   21.63 +	    (void)pclose(fp);
   21.64 +	    string_free(string,TRUE);
   21.65 +	    return FALSE;
   21.66 +	}
   21.67 +	string_set_size(string,len+n);
   21.68 +    } while(n);
   21.69 +    r=pclose(fp);
   21.70 +    if (r<0)
   21.71 +    {
   21.72 +	perror("pclose");
   21.73 +	string_free(string,TRUE);
   21.74 +	return FALSE;
   21.75 +    }
   21.76 +    else
   21.77 +    {
   21.78 +	if (exit_status)
   21.79 +	    *exit_status=r;
   21.80 +	if (standard_output)
   21.81 +	    *standard_output=string_free(string,FALSE);
   21.82 +	else
   21.83 +	    string_free(string,TRUE);
   21.84 +	return TRUE;
   21.85 +    }
   21.86 +#endif
   21.87 +}
    22.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    22.2 +++ b/gclib/spawn.h	Tue Jan 24 23:54:05 2012 +0000
    22.3 @@ -0,0 +1,8 @@
    22.4 +#ifndef GC_SPAWN_H
    22.5 +#define GC_SPAWN_H
    22.6 +
    22.7 +#include <gclib/gclib.h>
    22.8 +
    22.9 +boolean spawn_sync(char **argv,char **standard_output,int *exit_status);
   22.10 +
   22.11 +#endif /* GC_SPAWN_H */
    23.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    23.2 +++ b/gclib/strfuncs.c	Tue Jan 24 23:54:05 2012 +0000
    23.3 @@ -0,0 +1,26 @@
    23.4 +#include <stdlib.h>
    23.5 +#include <string.h>
    23.6 +#include <gclib/mem.h>
    23.7 +#include <gclib/strfuncs.h>
    23.8 +
    23.9 +/*
   23.10 + * Like strndup, but only returns NULL if str is NULL.
   23.11 + * Note that this routine copies n bytes rather than n characters.
   23.12 + */
   23.13 +char *str_ndup(const char *str,size_t n)
   23.14 +{
   23.15 +    char *dup;
   23.16 +    if (!str)
   23.17 +	return NULL;
   23.18 +    dup=mem_alloc0(n+1,1);
   23.19 +    strncpy(dup,str,n);
   23.20 +    return dup;
   23.21 +}
   23.22 +
   23.23 +/*
   23.24 + * Like strdup, but only returns NULL if str is NULL.
   23.25 + */
   23.26 +char *str_dup(const char *str)
   23.27 +{
   23.28 +    return str_ndup(str,strlen(str));
   23.29 +}
    24.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    24.2 +++ b/gclib/strfuncs.h	Tue Jan 24 23:54:05 2012 +0000
    24.3 @@ -0,0 +1,7 @@
    24.4 +#ifndef GC_STRFUNCS_H
    24.5 +#define GC_STRFUNCS_H
    24.6 +
    24.7 +char *str_dup(const char *str);
    24.8 +char *str_ndup(const char *str,size_t n);
    24.9 +
   24.10 +#endif /* GC_STRFUNCS_H */
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/gclib/textfileutils.c	Tue Jan 24 23:54:05 2012 +0000
    25.3 @@ -0,0 +1,33 @@
    25.4 +#include <stdlib.h>
    25.5 +#include <stdio.h>
    25.6 +#include <gclib/gclib.h>
    25.7 +
    25.8 +/*
    25.9 + * Read a file into memory (which should be freed with mem_free when no
   25.10 + * longer required). Returns NULL on error and outputs a suitable error
   25.11 + * message to stderr.
   25.12 + * DOS-style line endings are handled transparently even on platforms which
   25.13 + * don't normally use this format.
   25.14 + */
   25.15 +boolean file_get_contents_text(const char *filename,char **contents,
   25.16 +  size_t *length)
   25.17 +{
   25.18 +    int i;
   25.19 +    char *raw;
   25.20 +    size_t raw_length;
   25.21 +    String *string;
   25.22 +    if (!file_get_contents(filename,&raw,&raw_length))
   25.23 +	return FALSE;
   25.24 +    string=string_new(NULL);
   25.25 +    for(i=0;i<raw_length;i++)
   25.26 +	if (raw[i]!='\r')
   25.27 +	    string_append_c(string,raw[i]);
   25.28 +    mem_free(raw);
   25.29 +    if (length)
   25.30 +	*length=string->len;
   25.31 +    if (contents)
   25.32 +	*contents=string_free(string,FALSE);
   25.33 +    else
   25.34 +	string_free(string,TRUE);
   25.35 +    return TRUE;
   25.36 +}
    26.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.2 +++ b/gclib/textfileutils.h	Tue Jan 24 23:54:05 2012 +0000
    26.3 @@ -0,0 +1,9 @@
    26.4 +#ifndef GC_TEXTFILEUTILS_H
    26.5 +#define GC_TEXTFILEUTILS_H
    26.6 +
    26.7 +#include <gclib/gclib.h>
    26.8 +
    26.9 +boolean file_get_contents_text(const char *filename,char **contents,
   26.10 +  size_t *length);
   26.11 +
   26.12 +#endif /* GC_TEXTFILEUTILS_H */
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/gclib/types.h	Tue Jan 24 23:54:05 2012 +0000
    27.3 @@ -0,0 +1,6 @@
    27.4 +#ifndef GC_TYPES_H
    27.5 +#define GC_TYPES_H
    27.6 +
    27.7 +typedef int boolean;
    27.8 +
    27.9 +#endif	/* GC_TYPES_H */
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/gclib/utils.c	Tue Jan 24 23:54:05 2012 +0000
    28.3 @@ -0,0 +1,46 @@
    28.4 +#include <stdlib.h>
    28.5 +#include <string.h>
    28.6 +#include <unistd.h>
    28.7 +#include <gclib/mem.h>
    28.8 +#include <gclib/strfuncs.h>
    28.9 +#include <gclib/utils.h>
   28.10 +
   28.11 +#define is_valid_drive(d)	((d)>='a' && (d)<='z' || (d)>='A' && (d)<='Z')
   28.12 +
   28.13 +/*
   28.14 + * Gets the last component of the filename. If filename ends with a directory
   28.15 + * separator it gets the component before the last slash. If filename consists
   28.16 + * only of directory separators (and on Windows, possibly a drive letter), a
   28.17 + * single separator is returned. If filename is empty, it gets ".".
   28.18 + */
   28.19 +char *path_get_basename(const char *filename)
   28.20 +{
   28.21 +    ssize_t base,last_nonslash;
   28.22 +    size_t len;
   28.23 +    char *retval;
   28.24 +    if (*filename=='\0')
   28.25 +        return str_dup(".");
   28.26 +    last_nonslash=strlen(filename)-1;
   28.27 +    while (last_nonslash>=0 && GC_IS_DIR_SEPARATOR(filename[last_nonslash]))
   28.28 +	last_nonslash--;
   28.29 +    if (last_nonslash<0)
   28.30 +	/* string only containing slashes */
   28.31 +    return str_dup(GC_DIR_SEPARATOR_S);
   28.32 +#ifdef WIN32
   28.33 +    if (last_nonslash==1 && is_valid_drive(filename[0]) && filename[1]==':')
   28.34 +	/* string only containing slashes and a drive */
   28.35 +	return str_dup(GC_DIR_SEPARATOR_S);
   28.36 +#endif
   28.37 +    base=last_nonslash;
   28.38 +    while (base>=0 && !GC_IS_DIR_SEPARATOR(filename[base]))
   28.39 +	base--;
   28.40 +#ifdef WIN32
   28.41 +    if (base==-1 && is_valid_drive(filename[0]) && filename[1] == ':')
   28.42 +	  base=1;
   28.43 +#endif
   28.44 +    len=last_nonslash-base;
   28.45 +    retval=mem_alloc(len+1,1);
   28.46 +    memcpy(retval,filename+base+1,len);
   28.47 +    retval[len]='\0';
   28.48 +    return retval;
   28.49 +}
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/gclib/utils.h	Tue Jan 24 23:54:05 2012 +0000
    29.3 @@ -0,0 +1,16 @@
    29.4 +#ifndef GC_UTIL_H
    29.5 +#define GC_UTIL_H
    29.6 +
    29.7 +#ifdef WIN32
    29.8 +#define GC_DIR_SEPARATOR '\\'
    29.9 +#define GC_DIR_SEPARATOR_S "\\"
   29.10 +#define GC_IS_DIR_SEPARATOR(c) ((c)==GC_DIR_SEPARATOR || (c)=='/')
   29.11 +#else
   29.12 +#define GC_DIR_SEPARATOR '/'
   29.13 +#define GC_DIR_SEPARATOR_S "/"
   29.14 +#define GC_IS_DIR_SEPARATOR(c) ((c)==GC_DIR_SEPARATOR)
   29.15 +#endif
   29.16 +
   29.17 +char *path_get_basename(const char *filename);
   29.18 +
   29.19 +#endif /* GC_UTIL_H */
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/gutcheck/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
    30.3 @@ -0,0 +1,8 @@
    30.4 +bin_PROGRAMS=gutcheck
    30.5 +pkgdata_DATA=gutcheck.typ
    30.6 +
    30.7 +gutcheck.typ:	gutcheck.typ.in
    30.8 +	sed 's/$$/\r/' $< > $@
    30.9 +
   30.10 +EXTRA_DIST=gutcheck.typ.in
   30.11 +CLEANFILES=gutcheck.typ
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/gutcheck/gutcheck.c	Tue Jan 24 23:54:05 2012 +0000
    31.3 @@ -0,0 +1,2982 @@
    31.4 +/*************************************************************************/
    31.5 +/* gutcheck - check for assorted weirdnesses in a PG candidate text file */
    31.6 +/*                                                                       */
    31.7 +/* Version 0.991                                                         */
    31.8 +/* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>                  */
    31.9 +/*                                                                       */
   31.10 +/* This program is free software; you can redistribute it and/or modify  */
   31.11 +/* it under the terms of the GNU General Public License as published by  */
   31.12 +/* the Free Software Foundation; either version 2 of the License, or     */
   31.13 +/* (at your option) any later version.                                   */
   31.14 +/*                                                                       */
   31.15 +/* This program is distributed in the hope that it will be useful,       */
   31.16 +/* but WITHOUT ANY WARRANTY; without even the implied warranty of        */
   31.17 +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         */
   31.18 +/* GNU General Public License for more details.                          */
   31.19 +/*                                                                       */
   31.20 +/* You should have received a copy of the GNU General Public License     */
   31.21 +/* along with this program; if not, write to the                         */
   31.22 +/*      Free Software Foundation, Inc.,                                  */
   31.23 +/*      59 Temple Place,                                                 */
   31.24 +/*      Suite 330,                                                       */
   31.25 +/*      Boston, MA  02111-1307  USA                                      */
   31.26 +/*                                                                       */
   31.27 +/*                                                                       */
   31.28 +/*                                                                       */
   31.29 +/* Overview comments:                                                    */
   31.30 +/*                                                                       */
   31.31 +/* If you're reading this, you're either interested in how to detect     */
   31.32 +/* formatting errors, or very very bored.                                */
   31.33 +/*                                                                       */
   31.34 +/* Gutcheck is a homebrew formatting checker specifically for            */
   31.35 +/* spotting common formatting problems in a PG e-text. I typically       */
   31.36 +/* run it once or twice on a file I'm about to submit; it usually        */
   31.37 +/* finds a few formatting problems. It also usually finds lots of        */
   31.38 +/* queries that aren't problems at all; it _really_ doesn't like         */
   31.39 +/* the standard PG header, for example.  It's optimized for straight     */
   31.40 +/* prose; poetry and non-fiction involving tables tend to trigger        */
   31.41 +/* false alarms.                                                         */
   31.42 +/*                                                                       */
   31.43 +/* The code of gutcheck is not very interesting, but the experience      */
   31.44 +/* of what constitutes a possible error may be, and the best way to      */
   31.45 +/* illustrate that is by example.                                        */
   31.46 +/*                                                                       */
   31.47 +/*                                                                       */
   31.48 +/* Here are some common typos found in PG texts that gutcheck            */
   31.49 +/* will flag as errors:                                                  */
   31.50 +/*                                                                       */
   31.51 +/* "Look!John , over there!"                                             */
   31.52 +/* <this is a HTML tag>                                                  */
   31.53 +/* &so is this;                                                          */
   31.54 +/* Margaret said: " Now you should start for school."                    */
   31.55 +/* Margaret said: "Now you should start for school. (if end of para)     */
   31.56 +/* The horse is said to he worth a lot.                                  */
   31.57 +/* 0K - this'11 make you look close1y.                                   */
   31.58 +/* "If you do. you'll regret it!"                                        */
   31.59 +/*                                                                       */
   31.60 +/* There are some complications . The extra space left around that       */
   31.61 +/* period was an error . . . but that ellipsis wasn't.                   */
   31.62 +/*                                                                       */
   31.63 +/* The last line of a paragraph                                          */
   31.64 +/* is usually short.                                                     */
   31.65 +/*                                                                       */
   31.66 +/* This period is an error.But the periods in a.m. aren't.               */
   31.67 +/*                                                                       */
   31.68 +/* Checks that are do-able but not (well) implemented are:               */
   31.69 +/*        Single-quote chcking.                                          */
   31.70 +/*          Despite 3 attempts at it, singlequote checking is still      */
   31.71 +/*          crap in gutcheck. It may not be possible without analysis    */
   31.72 +/*          of the whole paragraph.                                      */
   31.73 +/*                                                                       */
   31.74 +/*************************************************************************/
   31.75 +
   31.76 +
   31.77 +#include <stdio.h>
   31.78 +#include <stdlib.h>
   31.79 +#include <string.h>
   31.80 +#include <ctype.h>
   31.81 +
   31.82 +#define MAXWORDLEN    80    /* max length of one word             */
   31.83 +#define LINEBUFSIZE 2048    /* buffer size for an input line      */
   31.84 +
   31.85 +#define MAX_USER_TYPOS 1000
   31.86 +#define USERTYPO_FILE "gutcheck.typ"
   31.87 +
   31.88 +#ifndef MAX_PATH
   31.89 +#define MAX_PATH 16384
   31.90 +#endif
   31.91 +
   31.92 +char aline[LINEBUFSIZE];
   31.93 +char prevline[LINEBUFSIZE];
   31.94 +
   31.95 +                 /* Common typos. */
   31.96 +char *typo[] = { "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane", "nad",
   31.97 +                "te", "ig", "acn",  "ahve", "alot", "anbd", "andt", "awya", "aywa", "bakc", "om",
   31.98 +                "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt", "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr",
   31.99 +                "hmi", "hse", "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd", "gerat", "goign",
  31.100 +                "gruop", "haev", "hda", "hearign", "seeign", "sayign", "herat", "hge", "hsa", "hsi", "hte", "htere",
  31.101 +                "htese", "htey", "htis", "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut", "loev",
  31.102 +                "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter", "omre", "onyl", "otehr", "otu", "owrk",
  31.103 +                "owuld", "peice", "peices", "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",
  31.104 +                "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe", "sohw", "stnad", "stopry",
  31.105 +                "stoyr", "stpo", "tahn", "taht", "tath", "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge",
  31.106 +                "thier", "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne", "tirne", "tkae",
  31.107 +                "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey", "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih",
  31.108 +                "whihc", "whta", "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",
  31.109 +                "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking", "wtih", "wuould", "wya", "yera",
  31.110 +                "yeras", "yersa", "yoiu", "youve", "ytou", "yuor",
  31.111 +                /* added h/b words for version 12 - removed a few with "tbe" v.25 */
  31.112 +                "abead", "ahle", "ahout", "ahove", "altbough", "balf", "bardly", "bas", "bave", "baving", "bebind", 
  31.113 +                "beld", "belp", "belped", "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge", "dehates", 
  31.114 +                "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan", "hegin", "heing", 
  31.115 +                "helieve", "henefit", "hetter", "hetween", "heyond", "hig", "higber", "huild", "huy", "hy", "jobn", "joh", 
  31.116 +                "meanwbile", "memher", "memhers", "numher", "numhers", 
  31.117 +                "perbaps", "prohlem", "puhlic", "witbout", 
  31.118 +                /* and a few more for .18 */
  31.119 +                "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud", "prornise", "prornised", "modem", "bo",
  31.120 +                "heside", "chapteb", "chaptee", "se",
  31.121 +                 ""};
  31.122 +
  31.123 +char *usertypo[MAX_USER_TYPOS];
  31.124 +
  31.125 +                 /* Common abbreviations and other OK words not to query as typos. */
  31.126 +                 /* 0.99 last-minute - removed "ms"      */
  31.127 +char *okword[] = {"mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm", "rd", "sh", "br",
  31.128 +                  "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "pompeii","hawaii","hawaiian",
  31.129 +                  "hotbed", "heartbeat", "heartbeats", "outbid", "outbids", "frostbite", "frostbitten",
  31.130 +                  ""};
  31.131 +
  31.132 +                 /* Common abbreviations that cause otherwise unexplained periods. */
  31.133 +char *abbrev[] = {"cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op", "cit",
  31.134 +                  "deg", "min", "chap", "oz", "mme", "mlle", "mssrs",
  31.135 +                  ""};
  31.136 +                 /* Two-Letter combinations that rarely if ever start words, */
  31.137 +                 /* but are common scannos or otherwise common letter        */
  31.138 +                 /* combinations.                                            */
  31.139 +char *nostart[] = { "hr", "hl", "cb", "sb", "tb", "wb", "tl",
  31.140 +                    "tn", "rn", "lt", "tj",
  31.141 +                    "" };
  31.142 +
  31.143 +                 /* Two-Letter combinations that rarely if ever end words    */
  31.144 +                 /* but are common scannos or otherwise common letter        */
  31.145 +                 /* combinations                                             */
  31.146 +char *noend[]   = { "cb", "gb", "pb", "sb", "tb", 
  31.147 +                    "wh","fr","br","qu","tw","gl","fl","sw","gr","sl","cl",
  31.148 +                    "iy",
  31.149 +                    ""};
  31.150 +
  31.151 +char *markup[]  = { "a", "b", "big", "blockquote", "body", "br", "center", 
  31.152 +                    "col", "div", "em", "font", "h1", "h2", "h3", "h4", 
  31.153 +                    "h5", "h6", "head", "hr", "html", "i", "img", "li", 
  31.154 +                    "meta", "ol", "p", "pre", "small", "span", "strong", 
  31.155 +                    "sub", "sup", "table", "td", "tfoot", "thead", "title", 
  31.156 +                    "tr", "tt", "u", "ul", 
  31.157 +                    ""};
  31.158 +
  31.159 +char *DPmarkup[] = { "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>",
  31.160 +                    ""}; /* <tb> added .991 */
  31.161 +
  31.162 +char *nocomma[]  = { "the", "it's", "their", "an", "mrs", "a", "our", "that's",
  31.163 +                     "its", "whose", "every", "i'll", "your", "my", 
  31.164 +                     "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "rd", 
  31.165 +                     "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", 
  31.166 +                     "i'm", "during", "let", "toward", "among",
  31.167 +                     ""};
  31.168 +
  31.169 +
  31.170 +char *noperiod[] = { "every", "i'm", "during", "that's", "their", "your", "our", "my", "or", 
  31.171 +                     "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether", 
  31.172 +                     "i'll", "whose", "who", "because", "when", "let", "till", "very",
  31.173 +                     "an", "among", "those", "into", "whom", "having", "thence",
  31.174 +                     ""}; 
  31.175 +
  31.176 +
  31.177 +char vowels[] = "aeiouàáâãäæèéêëìíîïòóôõöùúûü";  /* Carlo's old suggestion, updated .991 */
  31.178 +
  31.179 +struct {
  31.180 +    char *htmlent;
  31.181 +    char *htmlnum;
  31.182 +    char *textent;
  31.183 +    } entities[] = { "&amp;",           "&#38;",        "&", 
  31.184 +                     "&lt;",            "&#60;",        "<",
  31.185 +                     "&gt;",            "&#62;",        ">",
  31.186 +                     "&deg;",           "&#176;",       " degrees",
  31.187 +                     "&pound;",         "&#163;",       "L",
  31.188 +                     "&quot;",          "&#34;",        "\"",   /* -- quotation mark = APL quote, */
  31.189 +                     "&OElig;",         "&#338;",       "OE",  /* -- latin capital ligature OE, */
  31.190 +                     "&oelig;",         "&#339;",       "oe",  /* -- latin small ligature oe, U+0153 ISOlat2 --> */
  31.191 +                     "&Scaron;",        "&#352;",       "S",  /* -- latin capital letter S with caron, */
  31.192 +                     "&scaron;",        "&#353;",       "s",  /* -- latin small letter s with caron, */
  31.193 +                     "&Yuml;",          "&#376;",       "Y",  /* -- latin capital letter Y with diaeresis, */
  31.194 +                     "&circ;",          "&#710;",       "",  /* -- modifier letter circumflex accent, */
  31.195 +                     "&tilde;",         "&#732;",       "~",  /* -- small tilde, U+02DC ISOdia --> */
  31.196 +                     "&ensp;",          "&#8194;",      " ", /* -- en space, U+2002 ISOpub --> */
  31.197 +                     "&emsp;",          "&#8195;",      " ", /* -- em space, U+2003 ISOpub --> */
  31.198 +                     "&thinsp;",        "&#8201;",      " ", /* -- thin space, U+2009 ISOpub --> */
  31.199 +                     "&ndash;",         "&#8211;",      "-", /* -- en dash, U+2013 ISOpub --> */
  31.200 +                     "&mdash;",         "&#8212;",      "--", /* -- em dash, U+2014 ISOpub --> */
  31.201 +                     "&lsquo;",         "&#8216;",      "'", /* -- left single quotation mark, */
  31.202 +                     "&rsquo;",         "&#8217;",      "'", /* -- right single quotation mark, */
  31.203 +                     "&sbquo;",         "&#8218;",      "'", /* -- single low-9 quotation mark, U+201A NEW --> */
  31.204 +                     "&ldquo;",         "&#8220;",      "\"", /* -- left double quotation mark, */
  31.205 +                     "&rdquo;",         "&#8221;",      "\"", /* -- right double quotation mark, */
  31.206 +                     "&bdquo;",         "&#8222;",      "\"", /* -- double low-9 quotation mark, U+201E NEW --> */
  31.207 +                     "&lsaquo;",        "&#8249;",      "\"", /* -- single left-pointing angle quotation mark, */
  31.208 +                     "&rsaquo;",        "&#8250;",      "\"", /* -- single right-pointing angle quotation mark, */
  31.209 +                     "&nbsp;",          "&#160;",       " ", /* -- no-break space = non-breaking space, */
  31.210 +                     "&iexcl;",         "&#161;",       "!", /* -- inverted exclamation mark, U+00A1 ISOnum --> */
  31.211 +                     "&cent;",          "&#162;",       "c", /* -- cent sign, U+00A2 ISOnum --> */
  31.212 +                     "&pound;",         "&#163;",       "L", /* -- pound sign, U+00A3 ISOnum --> */
  31.213 +                     "&curren;",        "&#164;",       "$", /* -- currency sign, U+00A4 ISOnum --> */
  31.214 +                     "&yen;",           "&#165;",       "Y", /* -- yen sign = yuan sign, U+00A5 ISOnum --> */
  31.215 +                     "&sect;",          "&#167;",       "--", /* -- section sign, U+00A7 ISOnum --> */
  31.216 +                     "&uml;",           "&#168;",       " ", /* -- diaeresis = spacing diaeresis, */
  31.217 +                     "&copy;",          "&#169;",       "(C) ", /* -- copyright sign, U+00A9 ISOnum --> */
  31.218 +                     "&ordf;",          "&#170;",       " ", /* -- feminine ordinal indicator, U+00AA ISOnum --> */
  31.219 +                     "&laquo;",         "&#171;",       "\"", /* -- left-pointing double angle quotation mark */
  31.220 +                     "&shy;",           "&#173;",       "-", /* -- soft hyphen = discretionary hyphen, */
  31.221 +                     "&reg;",           "&#174;",       "(R) ", /* -- registered sign = registered trade mark sign, */
  31.222 +                     "&macr;",          "&#175;",       " ", /* -- macron = spacing macron = overline */
  31.223 +                     "&deg;",           "&#176;",       " degrees", /* -- degree sign, U+00B0 ISOnum --> */
  31.224 +                     "&plusmn;",        "&#177;",       "+-", /* -- plus-minus sign = plus-or-minus sign, */
  31.225 +                     "&sup2;",          "&#178;",       "2", /* -- superscript two = superscript digit two */
  31.226 +                     "&sup3;",          "&#179;",       "3", /* -- superscript three = superscript digit three */
  31.227 +                     "&acute;",         "&#180;",       " ", /* -- acute accent = spacing acute, */
  31.228 +                     "&micro;",         "&#181;",       "m", /* -- micro sign, U+00B5 ISOnum --> */
  31.229 +                     "&para;",          "&#182;",       "--", /* -- pilcrow sign = paragraph sign, */
  31.230 +                     "&cedil;",         "&#184;",       " ", /* -- cedilla = spacing cedilla, U+00B8 ISOdia --> */
  31.231 +                     "&sup1;",          "&#185;",       "1", /* -- superscript one = superscript digit one, */
  31.232 +                     "&ordm;",          "&#186;",       " ", /* -- masculine ordinal indicator, */
  31.233 +                     "&raquo;",         "&#187;",       "\"", /* -- right-pointing double angle quotation mark */
  31.234 +                     "&frac14;",        "&#188;",       "1/4", /* -- vulgar fraction one quarter */
  31.235 +                     "&frac12;",        "&#189;",       "1/2", /* -- vulgar fraction one half */
  31.236 +                     "&frac34;",        "&#190;",       "3/4", /* -- vulgar fraction three quarters */
  31.237 +                     "&iquest;",        "&#191;",       "?", /* -- inverted question mark */
  31.238 +                     "&Agrave;",        "&#192;",       "A", /* -- latin capital letter A with grave */
  31.239 +                     "&Aacute;",        "&#193;",       "A", /* -- latin capital letter A with acute, */
  31.240 +                     "&Acirc;",         "&#194;",       "A", /* -- latin capital letter A with circumflex, */
  31.241 +                     "&Atilde;",        "&#195;",       "A", /* -- latin capital letter A with tilde, */
  31.242 +                     "&Auml;",          "&#196;",       "A", /* -- latin capital letter A with diaeresis, */
  31.243 +                     "&Aring;",         "&#197;",       "A", /* -- latin capital letter A with ring above */
  31.244 +                     "&AElig;",         "&#198;",       "AE", /* -- latin capital letter AE */
  31.245 +                     "&Ccedil;",        "&#199;",       "C", /* -- latin capital letter C with cedilla, */
  31.246 +                     "&Egrave;",        "&#200;",       "E", /* -- latin capital letter E with grave, */
  31.247 +                     "&Eacute;",        "&#201;",       "E", /* -- latin capital letter E with acute, */
  31.248 +                     "&Ecirc;",         "&#202;",       "E", /* -- latin capital letter E with circumflex, */
  31.249 +                     "&Euml;",          "&#203;",       "E", /* -- latin capital letter E with diaeresis, */
  31.250 +                     "&Igrave;",        "&#204;",       "I", /* -- latin capital letter I with grave, */
  31.251 +                     "&Iacute;",        "&#205;",       "I", /* -- latin capital letter I with acute, */
  31.252 +                     "&Icirc;",         "&#206;",       "I", /* -- latin capital letter I with circumflex, */
  31.253 +                     "&Iuml;",          "&#207;",       "I", /* -- latin capital letter I with diaeresis, */
  31.254 +                     "&ETH;",           "&#208;",       "E", /* -- latin capital letter ETH, U+00D0 ISOlat1 --> */
  31.255 +                     "&Ntilde;",        "&#209;",       "N", /* -- latin capital letter N with tilde, */
  31.256 +                     "&Ograve;",        "&#210;",       "O", /* -- latin capital letter O with grave, */
  31.257 +                     "&Oacute;",        "&#211;",       "O", /* -- latin capital letter O with acute, */
  31.258 +                     "&Ocirc;",         "&#212;",       "O", /* -- latin capital letter O with circumflex, */
  31.259 +                     "&Otilde;",        "&#213;",       "O", /* -- latin capital letter O with tilde, */
  31.260 +                     "&Ouml;",          "&#214;",       "O", /* -- latin capital letter O with diaeresis, */
  31.261 +                     "&times;",         "&#215;",       "*", /* -- multiplication sign, U+00D7 ISOnum --> */
  31.262 +                     "&Oslash;",        "&#216;",       "O", /* -- latin capital letter O with stroke */
  31.263 +                     "&Ugrave;",        "&#217;",       "U", /* -- latin capital letter U with grave, */
  31.264 +                     "&Uacute;",        "&#218;",       "U", /* -- latin capital letter U with acute, */
  31.265 +                     "&Ucirc;",         "&#219;",       "U", /* -- latin capital letter U with circumflex, */
  31.266 +                     "&Uuml;",          "&#220;",       "U", /* -- latin capital letter U with diaeresis, */
  31.267 +                     "&Yacute;",        "&#221;",       "Y", /* -- latin capital letter Y with acute, */
  31.268 +                     "&THORN;",         "&#222;",       "TH", /* -- latin capital letter THORN, */
  31.269 +                     "&szlig;",         "&#223;",       "sz", /* -- latin small letter sharp s = ess-zed, */
  31.270 +                     "&agrave;",        "&#224;",       "a", /* -- latin small letter a with grave */
  31.271 +                     "&aacute;",        "&#225;",       "a", /* -- latin small letter a with acute, */
  31.272 +                     "&acirc;",         "&#226;",       "a", /* -- latin small letter a with circumflex, */
  31.273 +                     "&atilde;",        "&#227;",       "a", /* -- latin small letter a with tilde, */
  31.274 +                     "&auml;",          "&#228;",       "a", /* -- latin small letter a with diaeresis, */
  31.275 +                     "&aring;",         "&#229;",       "a", /* -- latin small letter a with ring above */
  31.276 +                     "&aelig;",         "&#230;",       "ae", /* -- latin small letter ae */
  31.277 +                     "&ccedil;",        "&#231;",       "c", /* -- latin small letter c with cedilla, */
  31.278 +                     "&egrave;",        "&#232;",       "e", /* -- latin small letter e with grave, */
  31.279 +                     "&eacute;",        "&#233;",       "e", /* -- latin small letter e with acute, */
  31.280 +                     "&ecirc;",         "&#234;",       "e", /* -- latin small letter e with circumflex, */
  31.281 +                     "&euml;",          "&#235;",       "e", /* -- latin small letter e with diaeresis, */
  31.282 +                     "&igrave;",        "&#236;",       "i", /* -- latin small letter i with grave, */
  31.283 +                     "&iacute;",        "&#237;",       "i", /* -- latin small letter i with acute, */
  31.284 +                     "&icirc;",         "&#238;",       "i", /* -- latin small letter i with circumflex, */
  31.285 +                     "&iuml;",          "&#239;",       "i", /* -- latin small letter i with diaeresis, */
  31.286 +                     "&eth;",           "&#240;",       "eth", /* -- latin small letter eth, U+00F0 ISOlat1 --> */
  31.287 +                     "&ntilde;",        "&#241;",       "n", /* -- latin small letter n with tilde, */
  31.288 +                     "&ograve;",        "&#242;",       "o", /* -- latin small letter o with grave, */
  31.289 +                     "&oacute;",        "&#243;",       "o", /* -- latin small letter o with acute, */
  31.290 +                     "&ocirc;",         "&#244;",       "o", /* -- latin small letter o with circumflex, */
  31.291 +                     "&otilde;",        "&#245;",       "o", /* -- latin small letter o with tilde, */
  31.292 +                     "&ouml;",          "&#246;",       "o", /* -- latin small letter o with diaeresis, */
  31.293 +                     "&divide;",        "&#247;",       "/", /* -- division sign, U+00F7 ISOnum --> */
  31.294 +                     "&oslash;",        "&#248;",       "o", /* -- latin small letter o with stroke, */
  31.295 +                     "&ugrave;",        "&#249;",       "u", /* -- latin small letter u with grave, */
  31.296 +                     "&uacute;",        "&#250;",       "u", /* -- latin small letter u with acute, */
  31.297 +                     "&ucirc;",         "&#251;",       "u", /* -- latin small letter u with circumflex, */
  31.298 +                     "&uuml;",          "&#252;",       "u", /* -- latin small letter u with diaeresis, */
  31.299 +                     "&yacute;",        "&#253;",       "y", /* -- latin small letter y with acute, */
  31.300 +                     "&thorn;",         "&#254;",       "th", /* -- latin small letter thorn, */
  31.301 +                     "&yuml;",          "&#255;",       "y", /* -- latin small letter y with diaeresis, */
  31.302 +                      "", "" };
  31.303 +                    
  31.304 +/* ---- list of special characters ---- */
  31.305 +#define CHAR_SPACE        32
  31.306 +#define CHAR_TAB           9
  31.307 +#define CHAR_LF           10
  31.308 +#define CHAR_CR           13
  31.309 +#define CHAR_DQUOTE       34
  31.310 +#define CHAR_SQUOTE       39
  31.311 +#define CHAR_OPEN_SQUOTE  96
  31.312 +#define CHAR_TILDE       126
  31.313 +#define CHAR_ASTERISK     42
  31.314 +#define CHAR_FORESLASH    47
  31.315 +#define CHAR_CARAT        94
  31.316 +
  31.317 +#define CHAR_UNDERSCORE    '_'
  31.318 +#define CHAR_OPEN_CBRACK   '{'
  31.319 +#define CHAR_CLOSE_CBRACK  '}'
  31.320 +#define CHAR_OPEN_RBRACK   '('
  31.321 +#define CHAR_CLOSE_RBRACK  ')'
  31.322 +#define CHAR_OPEN_SBRACK   '['
  31.323 +#define CHAR_CLOSE_SBRACK  ']'
  31.324 +
  31.325 +
  31.326 +
  31.327 +
  31.328 +
  31.329 +/* ---- longest and shortest normal PG line lengths ----*/
  31.330 +#define LONGEST_PG_LINE   75
  31.331 +#define WAY_TOO_LONG      80
  31.332 +#define SHORTEST_PG_LINE  55
  31.333 +
  31.334 +#define SWITCHES "ESTPXLOYHWVMUD" /* switches:-                            */
  31.335 +                                  /*     D - ignore DP-specific markup     */
  31.336 +                                  /*     E - echo queried line             */
  31.337 +                                  /*     S - check single quotes           */
  31.338 +                                  /*     T - check common typos            */
  31.339 +                                  /*     P - require closure of quotes on  */
  31.340 +                                  /*         every paragraph               */
  31.341 +                                  /*     X - "Trust no one" :-) Paranoid!  */
  31.342 +                                  /*         Queries everything            */
  31.343 +                                  /*     L - line end checking defaults on */
  31.344 +                                  /*         -L turns it off               */
  31.345 +                                  /*     O - overview. Just shows counts.  */
  31.346 +                                  /*     Y - puts errors to stdout         */
  31.347 +                                  /*         instead of stderr             */
  31.348 +                                  /*     H - Echoes header fields          */
  31.349 +                                  /*     M - Ignore markup in < >          */
  31.350 +                                  /*     U - Use file of User-defined Typos*/
  31.351 +                                  /*     W - Defaults for use on Web upload*/
  31.352 +                                  /*     V - Verbose - list EVERYTHING!    */
  31.353 +#define SWITNO 14                 /* max number of switch parms            */
  31.354 +                                  /*        - used for defining array-size */
  31.355 +#define MINARGS   1               /* minimum no of args excl switches      */
  31.356 +#define MAXARGS   1               /* maximum no of args excl switches      */
  31.357 +
  31.358 +int pswit[SWITNO];                /* program switches set by SWITCHES      */
  31.359 +
  31.360 +#define ECHO_SWITCH      0
  31.361 +#define SQUOTE_SWITCH    1
  31.362 +#define TYPO_SWITCH      2
  31.363 +#define QPARA_SWITCH     3
  31.364 +#define PARANOID_SWITCH  4
  31.365 +#define LINE_END_SWITCH  5
  31.366 +#define OVERVIEW_SWITCH  6
  31.367 +#define STDOUT_SWITCH    7
  31.368 +#define HEADER_SWITCH    8
  31.369 +#define WEB_SWITCH       9
  31.370 +#define VERBOSE_SWITCH   10
  31.371 +#define MARKUP_SWITCH    11
  31.372 +#define USERTYPO_SWITCH  12
  31.373 +#define DP_SWITCH        13
  31.374 +
  31.375 +
  31.376 +
  31.377 +long cnt_dquot;       /* for overview mode, count of doublequote queries */
  31.378 +long cnt_squot;       /* for overview mode, count of singlequote queries */
  31.379 +long cnt_brack;       /* for overview mode, count of brackets queries */
  31.380 +long cnt_bin;         /* for overview mode, count of non-ASCII queries */
  31.381 +long cnt_odd;         /* for overview mode, count of odd character queries */
  31.382 +long cnt_long;        /* for overview mode, count of long line errors */
  31.383 +long cnt_short;       /* for overview mode, count of short line queries */
  31.384 +long cnt_punct;       /* for overview mode, count of punctuation and spacing queries */
  31.385 +long cnt_dash;        /* for overview mode, count of dash-related queries */
  31.386 +long cnt_word;        /* for overview mode, count of word queries */
  31.387 +long cnt_html;        /* for overview mode, count of html queries */
  31.388 +long cnt_lineend;     /* for overview mode, count of line-end queries */
  31.389 +long cnt_spacend;     /* count of lines with space at end  V .21 */
  31.390 +long linecnt;         /* count of total lines in the file */
  31.391 +long checked_linecnt; /* count of lines actually gutchecked V .26 */
  31.392 +
  31.393 +void proghelp(void);
  31.394 +void procfile(char *);
  31.395 +
  31.396 +#define LOW_THRESHOLD    0
  31.397 +#define HIGH_THRESHOLD   1
  31.398 +
  31.399 +#define START 0
  31.400 +#define END 1
  31.401 +#define PREV 0
  31.402 +#define NEXT 1
  31.403 +#define FIRST_OF_PAIR 0
  31.404 +#define SECOND_OF_PAIR 1
  31.405 +
  31.406 +#define MAX_WORDPAIR 1000
  31.407 +
  31.408 +char running_from[MAX_PATH];
  31.409 +
  31.410 +int mixdigit(char *);
  31.411 +char *getaword(char *, char *);
  31.412 +int matchword(char *, char *);
  31.413 +char *flgets(char *, int, FILE *, long);
  31.414 +void lowerit(char *);
  31.415 +int gcisalpha(unsigned char);
  31.416 +int gcisdigit(unsigned char);
  31.417 +int gcisletter(unsigned char);
  31.418 +char *gcstrchr(char *s, char c);
  31.419 +void postprocess_for_HTML(char *);
  31.420 +char *linehasmarkup(char *);
  31.421 +char *losemarkup(char *);
  31.422 +int tagcomp(char *, char *);
  31.423 +char *loseentities(char *);
  31.424 +int isroman(char *);
  31.425 +int usertypo_count;
  31.426 +void postprocess_for_DP(char *);
  31.427 +
  31.428 +char wrk[LINEBUFSIZE];
  31.429 +
  31.430 +/* This is disgustingly lazy, predefining max words & lengths,   */
  31.431 +/* but now I'm out of 16-bit restrictions, what's a couple of K? */
  31.432 +#define MAX_QWORD           50
  31.433 +#define MAX_QWORD_LENGTH    40
  31.434 +char qword[MAX_QWORD][MAX_QWORD_LENGTH];
  31.435 +char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
  31.436 +signed int dupcnt[MAX_QWORD];
  31.437 +
  31.438 +
  31.439 +
  31.440 +
  31.441 +int main(int argc, char **argv)
  31.442 +{
  31.443 +    char *argsw, *s;
  31.444 +    int i, switno, invarg;
  31.445 +    char usertypo_file[MAX_PATH];
  31.446 +    FILE *usertypofile;
  31.447 +
  31.448 +
  31.449 +    if (strlen(argv[0]) < sizeof(running_from))
  31.450 +        strcpy(running_from, argv[0]);  /* save the path to the executable gutcheck */
  31.451 +
  31.452 +    /* find out what directory we're running from */
  31.453 +    for (s = running_from + strlen(running_from); *s != '/' && *s != '\\' && s >= running_from; s--)
  31.454 +        *s = 0;
  31.455 +
  31.456 +
  31.457 +    switno = strlen(SWITCHES);
  31.458 +    for (i = switno ; --i >0 ; )
  31.459 +        pswit[i] = 0;           /* initialise switches */
  31.460 +
  31.461 +    /* Standard loop to extract switches.                   */
  31.462 +    /* When we come out of this loop, the arguments will be */
  31.463 +    /* in argv[0] upwards and the switches used will be     */
  31.464 +    /* represented by their equivalent elements in pswit[]  */
  31.465 +    while ( --argc > 0 && **++argv == '-')
  31.466 +        for (argsw = argv[0]+1; *argsw !='\0'; argsw++)
  31.467 +            for (i = switno, invarg = 1; (--i >= 0) && invarg == 1 ; )
  31.468 +                if ((toupper(*argsw)) == SWITCHES[i] ) {
  31.469 +                    invarg = 0;
  31.470 +                    pswit[i] = 1;
  31.471 +                    }
  31.472 +
  31.473 +    pswit[PARANOID_SWITCH] ^= 1;         /* Paranoid checking is turned OFF, not on, by its switch */
  31.474 +
  31.475 +    if (pswit[PARANOID_SWITCH]) {                         /* if running in paranoid mode */
  31.476 +        pswit[TYPO_SWITCH] = pswit[TYPO_SWITCH] ^ 1;      /* force typo checks as well   */
  31.477 +        }                                                 /* v.20 removed s and p switches from paranoid mode */
  31.478 +
  31.479 +    pswit[LINE_END_SWITCH] ^= 1;         /* Line-end checking is turned OFF, not on, by its switch */
  31.480 +    pswit[ECHO_SWITCH] ^= 1;             /* V.21 Echoing is turned OFF, not on, by its switch      */
  31.481 +
  31.482 +    if (pswit[OVERVIEW_SWITCH])       /* just print summary; don't echo */
  31.483 +        pswit[ECHO_SWITCH] = 0;
  31.484 +
  31.485 +    /* Web uploads - for the moment, this is really just a placeholder     */
  31.486 +    /* until we decide what processing we really want to do on web uploads */
  31.487 +    if (pswit[WEB_SWITCH]) {          /* specific override for web uploads */
  31.488 +        pswit[ECHO_SWITCH] =     1;
  31.489 +        pswit[SQUOTE_SWITCH] =   0;
  31.490 +        pswit[TYPO_SWITCH] =     1;
  31.491 +        pswit[QPARA_SWITCH] =    0;
  31.492 +        pswit[PARANOID_SWITCH] = 1;
  31.493 +        pswit[LINE_END_SWITCH] = 0;
  31.494 +        pswit[OVERVIEW_SWITCH] = 0;
  31.495 +        pswit[STDOUT_SWITCH] =   0;
  31.496 +        pswit[HEADER_SWITCH] =   1;
  31.497 +        pswit[VERBOSE_SWITCH] =  0;
  31.498 +        pswit[MARKUP_SWITCH] =   0;
  31.499 +        pswit[USERTYPO_SWITCH] = 0;
  31.500 +        pswit[DP_SWITCH] = 0;
  31.501 +        }
  31.502 +
  31.503 +
  31.504 +    if (argc < MINARGS || argc > MAXARGS) {  /* check number of args */
  31.505 +        proghelp();
  31.506 +        return(1);            /* exit */
  31.507 +        }
  31.508 +
  31.509 +
  31.510 +    /* read in the user-defined stealth scanno list */
  31.511 +
  31.512 +    if (pswit[USERTYPO_SWITCH]) {                    /* ... we were told we had one! */
  31.513 +        if ((usertypofile = fopen(USERTYPO_FILE, "rb")) == NULL) {   /* not in cwd. try gutcheck directory. */
  31.514 +            strcpy(usertypo_file, running_from);
  31.515 +            strcat(usertypo_file, USERTYPO_FILE);
  31.516 +            if ((usertypofile = fopen(usertypo_file, "rb")) == NULL) {  /* we ain't got no user typo file! */
  31.517 +                printf("   --> I couldn't find gutcheck.typ -- proceeding without user typos.\n");
  31.518 +                }
  31.519 +            }
  31.520 +
  31.521 +        usertypo_count = 0;
  31.522 +        if (usertypofile) {  /* we managed to open a User Typo File! */
  31.523 +            if (pswit[USERTYPO_SWITCH]) {
  31.524 +                while (flgets(aline, LINEBUFSIZE-1, usertypofile, (long)usertypo_count)) {
  31.525 +                    if (strlen(aline) > 1) {
  31.526 +                        if ((int)*aline > 33) {
  31.527 +                            s = malloc(strlen(aline)+1);
  31.528 +                            if (!s) {
  31.529 +                                fprintf(stderr, "gutcheck: cannot get enough memory for user typo file!!\n");
  31.530 +                                exit(1);
  31.531 +                                }
  31.532 +                            strcpy(s, aline);
  31.533 +                            usertypo[usertypo_count] = s;
  31.534 +                            usertypo_count++;
  31.535 +                            if (usertypo_count >= MAX_USER_TYPOS) {
  31.536 +                                printf("   --> Only %d user-defined typos allowed: ignoring the rest\n");
  31.537 +                                break;
  31.538 +                                }
  31.539 +                            }
  31.540 +                        }
  31.541 +                    }
  31.542 +                }
  31.543 +            fclose(usertypofile);
  31.544 +            }
  31.545 +        }
  31.546 +
  31.547 +
  31.548 +
  31.549 +
  31.550 +    fprintf(stderr, "gutcheck: Check and report on an e-text\n");
  31.551 +
  31.552 +    cnt_dquot = cnt_squot = cnt_brack = cnt_bin = cnt_odd = cnt_long =
  31.553 +    cnt_short = cnt_punct = cnt_dash = cnt_word = cnt_html = cnt_lineend =
  31.554 +    cnt_spacend = 0;
  31.555 +
  31.556 +    procfile(argv[0]);
  31.557 +
  31.558 +    if (pswit[OVERVIEW_SWITCH]) {
  31.559 +                         printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
  31.560 +                            checked_linecnt, linecnt, linecnt - checked_linecnt);
  31.561 +                         printf("    --------------- Queries found --------------\n");
  31.562 +        if (cnt_long)    printf("    Long lines:                             %5ld\n",cnt_long);
  31.563 +        if (cnt_short)   printf("    Short lines:                            %5ld\n",cnt_short);
  31.564 +        if (cnt_lineend) printf("    Line-end problems:                      %5ld\n",cnt_lineend);
  31.565 +        if (cnt_word)    printf("    Common typos:                           %5ld\n",cnt_word);
  31.566 +        if (cnt_dquot)   printf("    Unmatched quotes:                       %5ld\n",cnt_dquot);
  31.567 +        if (cnt_squot)   printf("    Unmatched SingleQuotes:                 %5ld\n",cnt_squot);
  31.568 +        if (cnt_brack)   printf("    Unmatched brackets:                     %5ld\n",cnt_brack);
  31.569 +        if (cnt_bin)     printf("    Non-ASCII characters:                   %5ld\n",cnt_bin);
  31.570 +        if (cnt_odd)     printf("    Proofing characters:                    %5ld\n",cnt_odd);
  31.571 +        if (cnt_punct)   printf("    Punctuation & spacing queries:          %5ld\n",cnt_punct);
  31.572 +        if (cnt_dash)    printf("    Non-standard dashes:                    %5ld\n",cnt_dash);
  31.573 +        if (cnt_html)    printf("    Possible HTML tags:                     %5ld\n",cnt_html);
  31.574 +        printf("\n");
  31.575 +        printf("    TOTAL QUERIES                           %5ld\n",
  31.576 +            cnt_dquot + cnt_squot + cnt_brack + cnt_bin + cnt_odd + cnt_long +
  31.577 +            cnt_short + cnt_punct + cnt_dash + cnt_word + cnt_html + cnt_lineend);
  31.578 +        }
  31.579 +
  31.580 +    return(0);
  31.581 +}
  31.582 +
  31.583 +
  31.584 +
  31.585 +/* procfile - process one file */
  31.586 +
  31.587 +void procfile(char *filename)
  31.588 +{
  31.589 +
  31.590 +    char *s, *t, *s1, laststart, *wordstart;
  31.591 +    char inword[MAXWORDLEN], testword[MAXWORDLEN];
  31.592 +    char parastart[81];     /* first line of current para */
  31.593 +    FILE *infile;
  31.594 +    long quot, squot, firstline, alphalen, totlen, binlen,
  31.595 +         shortline, longline, verylongline, spacedash, emdash,
  31.596 +         space_emdash, non_PG_space_emdash, PG_space_emdash,
  31.597 +         footerline, dotcomma, start_para_line, astline, fslashline,
  31.598 +         standalone_digit, hyphens, htmcount, endquote_count;
  31.599 +    long spline, nspline;
  31.600 +    signed int i, j, llen, isemptyline, isacro, isellipsis, istypo, alower,
  31.601 +         eNon_A, eTab, eTilde, eAst, eFSlash, eCarat;
  31.602 +    signed int warn_short, warn_long, warn_bin, warn_dash, warn_dotcomma,
  31.603 +         warn_ast, warn_fslash, warn_digit, warn_hyphen, warn_endquote;
  31.604 +    unsigned int lastlen, lastblen;
  31.605 +    signed int s_brack, c_brack, r_brack, c_unders;
  31.606 +    signed int open_single_quote, close_single_quote, guessquote, dquotepar, squotepar;
  31.607 +    signed int isnewpara, vowel, consonant;
  31.608 +    char dquote_err[80], squote_err[80], rbrack_err[80], sbrack_err[80], cbrack_err[80],
  31.609 +         unders_err[80];
  31.610 +    signed int qword_index, qperiod_index, isdup;
  31.611 +    signed int enddash;
  31.612 +    signed int Dutchcount, isDutch, Frenchcount, isFrench;
  31.613 +
  31.614 +
  31.615 +    
  31.616 +
  31.617 +
  31.618 +    laststart = CHAR_SPACE;
  31.619 +    lastlen = lastblen = 0;
  31.620 +    *dquote_err = *squote_err = *rbrack_err = *cbrack_err = *sbrack_err =
  31.621 +        *unders_err = *prevline = 0;
  31.622 +    linecnt = firstline = alphalen = totlen = binlen =
  31.623 +        shortline = longline = spacedash = emdash = checked_linecnt =
  31.624 +        space_emdash = non_PG_space_emdash = PG_space_emdash =
  31.625 +        footerline = dotcomma = start_para_line = astline = fslashline = 
  31.626 +        standalone_digit = hyphens = htmcount = endquote_count = 0;
  31.627 +    quot = squot = s_brack = c_brack = r_brack = c_unders = 0;
  31.628 +    i = llen = isemptyline = isacro = isellipsis = istypo = 0;
  31.629 +    warn_short = warn_long = warn_bin = warn_dash = warn_dotcomma = 
  31.630 +        warn_ast = warn_fslash = warn_digit = warn_endquote = 0;
  31.631 +    isnewpara = vowel = consonant = enddash = 0;
  31.632 +    spline = nspline = 0;
  31.633 +    qword_index = qperiod_index = isdup = 0;
  31.634 +    *inword = *testword = 0;
  31.635 +    open_single_quote = close_single_quote = guessquote = dquotepar = squotepar = 0;
  31.636 +    Dutchcount = isDutch = Frenchcount = isFrench = 0;
  31.637 +
  31.638 +
  31.639 +    for (j = 0; j < MAX_QWORD; j++) {
  31.640 +        dupcnt[j] = 0;
  31.641 +        for (i = 0; i < MAX_QWORD_LENGTH; i++)
  31.642 +            qword[i][j] = 0;
  31.643 +            qperiod[i][j] = 0;
  31.644 +            }
  31.645 +
  31.646 +
  31.647 +    if ((infile = fopen(filename, "rb")) == NULL) {
  31.648 +        if (pswit[STDOUT_SWITCH])
  31.649 +            fprintf(stdout, "gutcheck: cannot open %s\n", filename);
  31.650 +        else
  31.651 +            fprintf(stderr, "gutcheck: cannot open %s\n", filename);
  31.652 +        exit(1);
  31.653 +        }
  31.654 +
  31.655 +    fprintf(stdout, "\n\nFile: %s\n\n", filename);
  31.656 +    firstline = shortline = longline = verylongline = 0;
  31.657 +
  31.658 +
  31.659 +    /*****************************************************/
  31.660 +    /*                                                   */
  31.661 +    /*  Run a first pass - verify that it's a valid PG   */
  31.662 +    /*  file, decide whether to report some things that  */
  31.663 +    /*  occur many times in the text like long or short  */
  31.664 +    /*  lines, non-standard dashes, and other good stuff */
  31.665 +    /*  I'll doubtless think of later.                   */
  31.666 +    /*                                                   */
  31.667 +    /*****************************************************/
  31.668 +
  31.669 +    /*****************************************************/
  31.670 +    /* V.24  Sigh. Yet Another Header Change             */
  31.671 +    /*****************************************************/
  31.672 +
  31.673 +    while (fgets(aline, LINEBUFSIZE-1, infile)) {
  31.674 +        while (aline[strlen(aline)-1] == 10 || aline[strlen(aline)-1] == 13 ) aline[strlen(aline)-1] = 0;
  31.675 +        linecnt++;
  31.676 +        if (strstr(aline, "*END") && strstr(aline, "SMALL PRINT") && (strstr(aline, "PUBLIC DOMAIN") || strstr(aline, "COPYRIGHT"))) {
  31.677 +            if (spline)
  31.678 +                printf("   --> Duplicate header?\n");
  31.679 +            spline = linecnt + 1;   /* first line of non-header text, that is */
  31.680 +            }
  31.681 +        if (!strncmp(aline, "*** START", 9) && strstr(aline, "PROJECT GUTENBERG")) {
  31.682 +            if (nspline)
  31.683 +                printf("   --> Duplicate header?\n");
  31.684 +            nspline = linecnt + 1;   /* first line of non-header text, that is */
  31.685 +            }
  31.686 +        if (spline || nspline) {
  31.687 +            lowerit(aline);
  31.688 +            if (strstr(aline, "end") && strstr(aline, "project gutenberg")) {
  31.689 +                if (strstr(aline, "end") < strstr(aline, "project gutenberg")) {
  31.690 +                    if (footerline) {
  31.691 +                        if (!nspline) /* it's an old-form header - we can detect duplicates */
  31.692 +                            printf("   --> Duplicate footer?\n");
  31.693 +                        else 
  31.694 +                            ;
  31.695 +                        }
  31.696 +                    else {
  31.697 +                        footerline = linecnt;
  31.698 +                        }
  31.699 +                    }
  31.700 +                }
  31.701 +            }
  31.702 +        if (spline) firstline = spline;
  31.703 +        if (nspline) firstline = nspline;  /* override with new */
  31.704 +
  31.705 +        if (footerline) continue;    /* 0.99+ don't count the boilerplate in the footer */
  31.706 +
  31.707 +        llen = strlen(aline);
  31.708 +        totlen += llen;
  31.709 +        for (i = 0; i < llen; i++) {
  31.710 +            if ((unsigned char)aline[i] > 127) binlen++;
  31.711 +            if (gcisalpha(aline[i])) alphalen++;
  31.712 +            if (i > 0)
  31.713 +                if (aline[i] == CHAR_DQUOTE && isalpha(aline[i-1]))
  31.714 +                    endquote_count++;
  31.715 +            }
  31.716 +        if (strlen(aline) > 2
  31.717 +            && lastlen > 2 && lastlen < SHORTEST_PG_LINE
  31.718 +            && lastblen > 2 && lastblen > SHORTEST_PG_LINE
  31.719 +            && laststart != CHAR_SPACE)
  31.720 +                shortline++;
  31.721 +
  31.722 +        if (*aline) /* fixed line below for 0.96 */
  31.723 +            if ((unsigned char)aline[strlen(aline)-1] <= CHAR_SPACE) cnt_spacend++;
  31.724 +
  31.725 +        if (strstr(aline, ".,")) dotcomma++;
  31.726 +        /* 0.98 only count ast lines for ignoring purposes where there is */
  31.727 +        /* locase text on the line */
  31.728 +        if (strstr(aline, "*")) {
  31.729 +            for (s = aline; *s; s++)
  31.730 +                if (*s >='a' && *s <= 'z')
  31.731 +                    break;
  31.732 +             if (*s) astline++;
  31.733 +             }
  31.734 +        if (strstr(aline, "/"))
  31.735 +            fslashline++;
  31.736 +        for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
  31.737 +        if (aline[i] == '-' && aline[i-1] != '-') hyphens++;
  31.738 +
  31.739 +        if (llen > LONGEST_PG_LINE) longline++;
  31.740 +        if (llen > WAY_TOO_LONG) verylongline++;
  31.741 +
  31.742 +        if (strstr(aline, "<") && strstr(aline, ">")) {
  31.743 +            i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
  31.744 +            if (i > 0) 
  31.745 +                htmcount++;
  31.746 +            if (strstr(aline, "<i>")) htmcount +=4; /* bonus marks! */
  31.747 +            }
  31.748 +
  31.749 +        /* Check for spaced em-dashes */
  31.750 +        if (strstr(aline,"--")) {
  31.751 +            emdash++;
  31.752 +            if (*(strstr(aline, "--")-1) == CHAR_SPACE ||
  31.753 +               (*(strstr(aline, "--")+2) == CHAR_SPACE))
  31.754 +                    space_emdash++;
  31.755 +            if (*(strstr(aline, "--")-1) == CHAR_SPACE &&
  31.756 +               (*(strstr(aline, "--")+2) == CHAR_SPACE))
  31.757 +                    non_PG_space_emdash++;             /* count of em-dashes with spaces both sides */
  31.758 +            if (*(strstr(aline, "--")-1) != CHAR_SPACE &&
  31.759 +               (*(strstr(aline, "--")+2) != CHAR_SPACE))
  31.760 +                    PG_space_emdash++;                 /* count of PG-type em-dashes with no spaces */
  31.761 +            }
  31.762 +
  31.763 +        for (s = aline; *s;) {
  31.764 +            s = getaword(s, inword);
  31.765 +            if (!strcmp(inword, "hij") || !strcmp(inword, "niet")) 
  31.766 +                Dutchcount++;
  31.767 +            if (!strcmp(inword, "dans") || !strcmp(inword, "avec")) 
  31.768 +                Frenchcount++;
  31.769 +            if (!strcmp(inword, "0") || !strcmp(inword, "1")) 
  31.770 +                standalone_digit++;
  31.771 +            }
  31.772 +
  31.773 +        /* Check for spaced dashes */
  31.774 +        if (strstr(aline," -"))
  31.775 +            if (*(strstr(aline, " -")+2) != '-')
  31.776 +                    spacedash++;
  31.777 +        lastblen = lastlen;
  31.778 +        lastlen = strlen(aline);
  31.779 +        laststart = aline[0];
  31.780 +
  31.781 +        }
  31.782 +    fclose(infile);
  31.783 +
  31.784 +
  31.785 +    /* now, based on this quick view, make some snap decisions */
  31.786 +    if (cnt_spacend > 0) {
  31.787 +        printf("   --> %ld lines in this file have white space at end\n", cnt_spacend);
  31.788 +        }
  31.789 +
  31.790 +    warn_dotcomma = 1;
  31.791 +    if (dotcomma > 5) {
  31.792 +        warn_dotcomma = 0;
  31.793 +        printf("   --> %ld lines in this file contain '.,'. Not reporting them.\n", dotcomma);
  31.794 +        }
  31.795 +
  31.796 +    /* if more than 50 lines, or one-tenth, are short, don't bother reporting them */
  31.797 +    warn_short = 1;
  31.798 +    if (shortline > 50 || shortline * 10 > linecnt) {
  31.799 +        warn_short = 0;
  31.800 +        printf("   --> %ld lines in this file are short. Not reporting short lines.\n", shortline);
  31.801 +        }
  31.802 +
  31.803 +    /* if more than 50 lines, or one-tenth, are long, don't bother reporting them */
  31.804 +    warn_long = 1;
  31.805 +    if (longline > 50 || longline * 10 > linecnt) {
  31.806 +        warn_long = 0;
  31.807 +        printf("   --> %ld lines in this file are long. Not reporting long lines.\n", longline);
  31.808 +        }
  31.809 +
  31.810 +    /* if more than 10 lines contain asterisks, don't bother reporting them V.0.97 */
  31.811 +    warn_ast = 1;
  31.812 +    if (astline > 10 ) {
  31.813 +        warn_ast = 0;
  31.814 +        printf("   --> %ld lines in this file contain asterisks. Not reporting them.\n", astline);
  31.815 +        }
  31.816 +
  31.817 +    /* if more than 10 lines contain forward slashes, don't bother reporting them V.0.99 */
  31.818 +    warn_fslash = 1;
  31.819 +    if (fslashline > 10 ) {
  31.820 +        warn_fslash = 0;
  31.821 +        printf("   --> %ld lines in this file contain forward slashes. Not reporting them.\n", fslashline);
  31.822 +        }
  31.823 +
  31.824 +    /* if more than 20 lines contain unpunctuated endquotes, don't bother reporting them V.0.99 */
  31.825 +    warn_endquote = 1;
  31.826 +    if (endquote_count > 20 ) {
  31.827 +        warn_endquote = 0;
  31.828 +        printf("   --> %ld lines in this file contain unpunctuated endquotes. Not reporting them.\n", endquote_count);
  31.829 +        }
  31.830 +
  31.831 +    /* if more than 15 lines contain standalone digits, don't bother reporting them V.0.97 */
  31.832 +    warn_digit = 1;
  31.833 +    if (standalone_digit > 10 ) {
  31.834 +        warn_digit = 0;
  31.835 +        printf("   --> %ld lines in this file contain standalone 0s and 1s. Not reporting them.\n", standalone_digit);
  31.836 +        }
  31.837 +
  31.838 +    /* if more than 20 lines contain hyphens at end, don't bother reporting them V.0.98 */
  31.839 +    warn_hyphen = 1;
  31.840 +    if (hyphens > 20 ) {
  31.841 +        warn_hyphen = 0;
  31.842 +        printf("   --> %ld lines in this file have hyphens at end. Not reporting them.\n", hyphens);
  31.843 +        }
  31.844 +
  31.845 +    if (htmcount > 20 && !pswit[MARKUP_SWITCH]) {
  31.846 +        printf("   --> Looks like this is HTML. Switching HTML mode ON.\n");
  31.847 +        pswit[MARKUP_SWITCH] = 1;
  31.848 +        }
  31.849 +        
  31.850 +    if (verylongline > 0) {
  31.851 +        printf("   --> %ld lines in this file are VERY long!\n", verylongline);
  31.852 +        }
  31.853 +
  31.854 +    /* If there are more non-PG spaced dashes than PG em-dashes,    */
  31.855 +    /* assume it's deliberate                                       */
  31.856 +    /* Current PG guidelines say don't use them, but older texts do,*/
  31.857 +    /* and some people insist on them whatever the guidelines say.  */
  31.858 +    /* V.20 removed requirement that PG_space_emdash be greater than*/
  31.859 +    /* ten before turning off warnings about spaced dashes.         */
  31.860 +    warn_dash = 1;
  31.861 +    if (spacedash + non_PG_space_emdash > PG_space_emdash) {
  31.862 +        warn_dash = 0;
  31.863 +        printf("   --> There are %ld spaced dashes and em-dashes. Not reporting them.\n", spacedash + non_PG_space_emdash);
  31.864 +        }
  31.865 +
  31.866 +    /* if more than a quarter of characters are hi-bit, bug out */
  31.867 +    warn_bin = 1;
  31.868 +    if (binlen * 4 > totlen) {
  31.869 +        printf("   --> This file does not appear to be ASCII. Terminating. Best of luck with it!\n");
  31.870 +        exit(1);
  31.871 +        }
  31.872 +    if (alphalen * 4 < totlen) {
  31.873 +        printf("   --> This file does not appear to be text. Terminating. Best of luck with it!\n");
  31.874 +        exit(1);
  31.875 +        }
  31.876 +    if ((binlen * 100 > totlen) || (binlen > 100)) {
  31.877 +        printf("   --> There are a lot of foreign letters here. Not reporting them.\n");
  31.878 +        warn_bin = 0;
  31.879 +        }
  31.880 +
  31.881 +    /* isDutch and isFrench added .991 Feb 06 for Frank, Jeroen, Renald */
  31.882 +    isDutch = 0;
  31.883 +    if (Dutchcount > 50) {
  31.884 +        isDutch = 1;
  31.885 +        printf("   --> This looks like Dutch - switching off dashes and warnings for 's Middags case.\n");
  31.886 +        }
  31.887 +
  31.888 +    isFrench = 0;
  31.889 +    if (Frenchcount > 50) {
  31.890 +        isFrench = 1;
  31.891 +        printf("   --> This looks like French - switching off some doublepunct.\n");
  31.892 +        }
  31.893 +
  31.894 +    if (firstline && footerline)
  31.895 +        printf("    The PG header and footer appear to be already on.\n");
  31.896 +    else {
  31.897 +        if (firstline)
  31.898 +            printf("    The PG header is on - no footer.\n");
  31.899 +        if (footerline)
  31.900 +            printf("    The PG footer is on - no header.\n");
  31.901 +        }
  31.902 +    printf("\n");
  31.903 +
  31.904 +    /* V.22 George Davis asked for an override switch to force it to list everything */
  31.905 +    if (pswit[VERBOSE_SWITCH]) {
  31.906 +        warn_bin = 1;
  31.907 +        warn_short = 1;
  31.908 +        warn_dotcomma = 1;
  31.909 +        warn_long = 1;
  31.910 +        warn_dash = 1;
  31.911 +        warn_digit = 1;
  31.912 +        warn_ast = 1;
  31.913 +        warn_fslash = 1;
  31.914 +        warn_hyphen = 1;
  31.915 +        warn_endquote = 1;
  31.916 +        printf("   *** Verbose output is ON -- you asked for it! ***\n");
  31.917 +        }
  31.918 +
  31.919 +    if (isDutch)
  31.920 +        warn_dash = 0;  /* Frank suggested turning it REALLY off for Dutch */
  31.921 +
  31.922 +    if ((infile = fopen(filename, "rb")) == NULL) {
  31.923 +        if (pswit[STDOUT_SWITCH])
  31.924 +            fprintf(stdout, "gutcheck: cannot open %s\n", filename);
  31.925 +        else
  31.926 +            fprintf(stderr, "gutcheck: cannot open %s\n", filename);
  31.927 +        exit(1);
  31.928 +        }
  31.929 +
  31.930 +    if (footerline > 0 && firstline > 0 && footerline > firstline && footerline - firstline < 100) { /* ugh */
  31.931 +        printf("   --> I don't really know where this text starts. \n");
  31.932 +        printf("       There are no reference points.\n");
  31.933 +        printf("       I'm going to have to report the header and footer as well.\n");
  31.934 +        firstline=0;
  31.935 +        }
  31.936 +        
  31.937 +
  31.938 +
  31.939 +    /*****************************************************/
  31.940 +    /*                                                   */
  31.941 +    /* Here we go with the main pass. Hold onto yer hat! */
  31.942 +    /*                                                   */
  31.943 +    /*****************************************************/
  31.944 +
  31.945 +    /* Re-init some variables we've dirtied */
  31.946 +    quot = squot = linecnt = 0;
  31.947 +    laststart = CHAR_SPACE;
  31.948 +    lastlen = lastblen = 0;
  31.949 +
  31.950 +    while (flgets(aline, LINEBUFSIZE-1, infile, linecnt+1)) {
  31.951 +        linecnt++;
  31.952 +        if (linecnt == 1) isnewpara = 1;
  31.953 +        if (pswit[DP_SWITCH])
  31.954 +            if (!strncmp(aline, "-----File: ", 11))
  31.955 +                continue;    // skip DP page separators completely
  31.956 +        if (linecnt < firstline || (footerline > 0 && linecnt > footerline)) {
  31.957 +            if (pswit[HEADER_SWITCH]) {
  31.958 +                if (!strncmp(aline, "Title:", 6))
  31.959 +                    printf("    %s\n", aline);
  31.960 +                if (!strncmp (aline, "Author:", 7))
  31.961 +                    printf("    %s\n", aline);
  31.962 +                if (!strncmp(aline, "Release Date:", 13))
  31.963 +                    printf("    %s\n", aline);
  31.964 +                if (!strncmp(aline, "Edition:", 8))
  31.965 +                    printf("    %s\n\n", aline);
  31.966 +                }
  31.967 +            continue;                /* skip through the header */
  31.968 +            }
  31.969 +        checked_linecnt++;
  31.970 +        s = aline;
  31.971 +        isemptyline = 1;      /* assume the line is empty until proven otherwise */
  31.972 +
  31.973 +        /* If we are in a state of unbalanced quotes, and this line    */
  31.974 +        /* doesn't begin with a quote, output the stored error message */
  31.975 +        /* If the -P switch was used, print the warning even if the    */
  31.976 +        /* new para starts with quotes                                 */
  31.977 +        /* Version .20 - if the new paragraph does start with a quote, */
  31.978 +        /* but is indented, I was giving a spurious error. Need to     */
  31.979 +        /* check the first _non-space_ character on the line rather    */
  31.980 +        /* than the first character when deciding whether the para     */
  31.981 +        /* starts with a quote. Using *t for this.                     */
  31.982 +        t = s;
  31.983 +        while (*t == ' ') t++;
  31.984 +        if (*dquote_err)
  31.985 +            if (*t != CHAR_DQUOTE || pswit[QPARA_SWITCH]) {
  31.986 +                if (!pswit[OVERVIEW_SWITCH]) {
  31.987 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
  31.988 +                    printf(dquote_err);
  31.989 +                    }
  31.990 +                else
  31.991 +                    cnt_dquot++;
  31.992 +            }
  31.993 +        if (*squote_err) {
  31.994 +            if (*t != CHAR_SQUOTE && *t != CHAR_OPEN_SQUOTE || pswit[QPARA_SWITCH] || squot) {
  31.995 +                if (!pswit[OVERVIEW_SWITCH]) {
  31.996 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
  31.997 +                    printf(squote_err);
  31.998 +                    }
  31.999 +                else
 31.1000 +                    cnt_squot++;
 31.1001 +                }
 31.1002 +            squot = 0;
 31.1003 +            }
 31.1004 +        if (*rbrack_err) {
 31.1005 +            if (!pswit[OVERVIEW_SWITCH]) {
 31.1006 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 31.1007 +                printf(rbrack_err);
 31.1008 +                }
 31.1009 +            else
 31.1010 +                cnt_brack++;
 31.1011 +            }
 31.1012 +        if (*sbrack_err) {
 31.1013 +            if (!pswit[OVERVIEW_SWITCH]) {
 31.1014 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 31.1015 +                printf(sbrack_err);
 31.1016 +                }
 31.1017 +            else
 31.1018 +                cnt_brack++;
 31.1019 +            }
 31.1020 +        if (*cbrack_err) {
 31.1021 +            if (!pswit[OVERVIEW_SWITCH]) {
 31.1022 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 31.1023 +                printf(cbrack_err);
 31.1024 +                }
 31.1025 +            else
 31.1026 +                cnt_brack++;
 31.1027 +            }
 31.1028 +        if (*unders_err) {
 31.1029 +            if (!pswit[OVERVIEW_SWITCH]) {
 31.1030 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", parastart);
 31.1031 +                printf(unders_err);
 31.1032 +                }
 31.1033 +            else
 31.1034 +                cnt_brack++;
 31.1035 +            }
 31.1036 +
 31.1037 +        *dquote_err = *squote_err = *rbrack_err = *cbrack_err = 
 31.1038 +            *sbrack_err = *unders_err = 0;
 31.1039 +
 31.1040 +
 31.1041 +        /* look along the line, accumulate the count of quotes, and see */
 31.1042 +        /* if this is an empty line - i.e. a line with nothing on it    */
 31.1043 +        /* but spaces.                                                  */
 31.1044 +        /* V .12 also if line has just spaces, * and/or - on it, don't  */
 31.1045 +        /* count it, since empty lines with asterisks or dashes to      */
 31.1046 +        /* separate sections are common.                                */
 31.1047 +        /* V .15 new single-quote checking - has to be better than the  */
 31.1048 +        /* previous version, but how much better? fingers crossed!      */
 31.1049 +        /* V .20 add period to * and - as characters on a separator line*/
 31.1050 +        s = aline;
 31.1051 +        while (*s) {
 31.1052 +            if (*s == CHAR_DQUOTE) quot++;
 31.1053 +            if (*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
 31.1054 +                if (s == aline) { /* at start of line, it can only be an openquote */
 31.1055 +                    if (strncmp(s+2, "tis", 3) && strncmp(s+2, "Tis", 3)) /* hardcode a very common exception! */
 31.1056 +                        open_single_quote++;
 31.1057 +                    }
 31.1058 +                else
 31.1059 +                    if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
 31.1060 +                        ; /* do nothing! - it's definitely an apostrophe, not a quote */
 31.1061 +                    else        /* it's outside a word - let's check it out */
 31.1062 +                        if (*s == CHAR_OPEN_SQUOTE || gcisalpha(*(s+1))) { /* it damwell better BE an openquote */
 31.1063 +                            if (strncmp(s+1, "tis", 3) && strncmp(s+1, "Tis", 3)) /* hardcode a very common exception! */
 31.1064 +                                open_single_quote++;
 31.1065 +                            }
 31.1066 +                        else { /* now - is it a closequote? */
 31.1067 +                            guessquote = 0;   /* accumulate clues */
 31.1068 +                            if (gcisalpha(*(s-1))) { /* it follows a letter - could be either */
 31.1069 +                                guessquote += 1;
 31.1070 +                                if (*(s-1) == 's') { /* looks like a plural apostrophe */
 31.1071 +                                    guessquote -= 3;
 31.1072 +                                    if (*(s+1) == CHAR_SPACE)  /* bonus marks! */
 31.1073 +                                        guessquote -= 2;
 31.1074 +                                    }
 31.1075 +                                }
 31.1076 +                            else /* it doesn't have a letter either side */
 31.1077 +                                if (strchr(".?!,;:", *(s-1)) && (strchr(".?!,;: ", *(s+1))))
 31.1078 +                                    guessquote += 8; /* looks like a closequote */
 31.1079 +                                else
 31.1080 +                                    guessquote += 1;
 31.1081 +                            if (open_single_quote > close_single_quote)
 31.1082 +                                guessquote += 1; /* give it the benefit of some doubt - if a squote is already open */
 31.1083 +                            else
 31.1084 +                                guessquote -= 1;
 31.1085 +                            if (guessquote >= 0)
 31.1086 +                                close_single_quote++;
 31.1087 +                            }
 31.1088 +
 31.1089 +            if (*s != CHAR_SPACE
 31.1090 +                && *s != '-'
 31.1091 +                && *s != '.'
 31.1092 +                && *s != CHAR_ASTERISK
 31.1093 +                && *s != 13
 31.1094 +                && *s != 10) isemptyline = 0;  /* ignore lines like  *  *  *  as spacers */
 31.1095 +            if (*s == CHAR_UNDERSCORE) c_unders++;
 31.1096 +            if (*s == CHAR_OPEN_CBRACK) c_brack++;
 31.1097 +            if (*s == CHAR_CLOSE_CBRACK) c_brack--;
 31.1098 +            if (*s == CHAR_OPEN_RBRACK) r_brack++;
 31.1099 +            if (*s == CHAR_CLOSE_RBRACK) r_brack--;
 31.1100 +            if (*s == CHAR_OPEN_SBRACK) s_brack++;
 31.1101 +            if (*s == CHAR_CLOSE_SBRACK) s_brack--;
 31.1102 +            s++;
 31.1103 +            }
 31.1104 +
 31.1105 +        if (isnewpara && !isemptyline) {   /* This line is the start of a new paragraph */
 31.1106 +            start_para_line = linecnt;
 31.1107 +            strncpy(parastart, aline, 80); /* Capture its first line in case we want to report it later */
 31.1108 +            parastart[79] = 0;
 31.1109 +            dquotepar = squotepar = 0; /* restart the quote count 0.98 */
 31.1110 +            s = aline;
 31.1111 +            while (!gcisalpha(*s) && !gcisdigit(*s) && *s) s++;    /* V.97 fixed bug - overran line and gave false warning - rare */
 31.1112 +            if (*s >= 'a' && *s <='z') { /* and its first letter is lowercase */
 31.1113 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1114 +                if (!pswit[OVERVIEW_SWITCH])
 31.1115 +                    printf("    Line %ld column %d - Paragraph starts with lower-case\n", linecnt, (int)(s - aline) +1);
 31.1116 +                else
 31.1117 +                    cnt_punct++;
 31.1118 +                }
 31.1119 +            isnewpara = 0; /* Signal the end of new para processing */
 31.1120 +            }
 31.1121 +
 31.1122 +        /* Check for an em-dash broken at line end */
 31.1123 +        if (enddash && *aline == '-') {
 31.1124 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1125 +            if (!pswit[OVERVIEW_SWITCH])
 31.1126 +                printf("    Line %ld column 1 - Broken em-dash?\n", linecnt);
 31.1127 +            else
 31.1128 +                cnt_punct++;
 31.1129 +            }
 31.1130 +        enddash = 0;
 31.1131 +        for (s = aline + strlen(aline) - 1; *s == ' ' && s > aline; s--);
 31.1132 +        if (s >= aline && *s == '-')
 31.1133 +            enddash = 1;
 31.1134 +            
 31.1135 +
 31.1136 +        /* Check for invalid or questionable characters in the line */
 31.1137 +        /* Anything above 127 is invalid for plain ASCII,  and      */
 31.1138 +        /* non-printable control characters should also be flagged. */
 31.1139 +        /* Tabs should generally not be there.                      */
 31.1140 +        /* Jan 06, in 0.99: Hm. For some strange reason, I either   */
 31.1141 +        /* never created or deleted the check for unprintable       */
 31.1142 +        /* control characters. They should be reported even if      */
 31.1143 +        /* warn_bin is on, I think, and in full.                    */
 31.1144 +
 31.1145 +        for (s = aline; *s; s++) {
 31.1146 +            i = (unsigned char) *s;
 31.1147 +            if (i < CHAR_SPACE && i != CHAR_LF && i != CHAR_CR && i != CHAR_TAB) {
 31.1148 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1149 +                if (!pswit[OVERVIEW_SWITCH])
 31.1150 +                    printf("    Line %ld column %d - Control character %d\n", linecnt, (int) (s - aline) + 1, i);
 31.1151 +                else
 31.1152 +                    cnt_bin++;
 31.1153 +                }
 31.1154 +            }
 31.1155 +
 31.1156 +        if (warn_bin) {
 31.1157 +            eNon_A = eTab = eTilde = eCarat = eFSlash = eAst = 0;  /* don't repeat multiple warnings on one line */
 31.1158 +            for (s = aline; *s; s++) {
 31.1159 +                if (!eNon_A && ((*s < CHAR_SPACE && *s != 9 && *s != '\n') || (unsigned char)*s > 127)) {
 31.1160 +                    i = *s;                           /* annoying kludge for signed chars */
 31.1161 +                    if (i < 0) i += 256;
 31.1162 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1163 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1164 +                        if (i > 127 && i < 160)
 31.1165 +                            printf("    Line %ld column %d - Non-ISO-8859 character %d\n", linecnt, (int) (s - aline) + 1, i);
 31.1166 +                        else
 31.1167 +                            printf("    Line %ld column %d - Non-ASCII character %d\n", linecnt, (int) (s - aline) + 1, i);
 31.1168 +                    else
 31.1169 +                        cnt_bin++;
 31.1170 +                    eNon_A = 1;
 31.1171 +                    }
 31.1172 +                if (!eTab && *s == CHAR_TAB) {
 31.1173 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1174 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1175 +                        printf("    Line %ld column %d - Tab character?\n", linecnt, (int) (s - aline) + 1);
 31.1176 +                    else
 31.1177 +                        cnt_odd++;
 31.1178 +                    eTab = 1;
 31.1179 +                    }
 31.1180 +                if (!eTilde && *s == CHAR_TILDE) {  /* often used by OCR software to indicate an unrecognizable character */
 31.1181 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1182 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1183 +                        printf("    Line %ld column %d - Tilde character?\n", linecnt, (int) (s - aline) + 1);
 31.1184 +                    else
 31.1185 +                        cnt_odd++;
 31.1186 +                    eTilde = 1;
 31.1187 +                    }
 31.1188 +                if (!eCarat && *s == CHAR_CARAT) {  
 31.1189 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1190 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1191 +                        printf("    Line %ld column %d - Carat character?\n", linecnt, (int) (s - aline) + 1);
 31.1192 +                    else
 31.1193 +                        cnt_odd++;
 31.1194 +                    eCarat = 1;
 31.1195 +                    }
 31.1196 +                if (!eFSlash && *s == CHAR_FORESLASH && warn_fslash) {  
 31.1197 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1198 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1199 +                        printf("    Line %ld column %d - Forward slash?\n", linecnt, (int) (s - aline) + 1);
 31.1200 +                    else
 31.1201 +                        cnt_odd++;
 31.1202 +                    eFSlash = 1;
 31.1203 +                    }
 31.1204 +                /* report asterisks only in paranoid mode, since they're often deliberate */
 31.1205 +                if (!eAst && pswit[PARANOID_SWITCH] && warn_ast && !isemptyline && *s == CHAR_ASTERISK) {
 31.1206 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1207 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1208 +                        printf("    Line %ld column %d - Asterisk?\n", linecnt, (int) (s - aline) + 1);
 31.1209 +                    else
 31.1210 +                        cnt_odd++;
 31.1211 +                    eAst = 1;
 31.1212 +                    }
 31.1213 +                }
 31.1214 +            }
 31.1215 +
 31.1216 +        /* Check for line too long */
 31.1217 +        if (warn_long) {
 31.1218 +            if (strlen(aline) > LONGEST_PG_LINE) {
 31.1219 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1220 +                if (!pswit[OVERVIEW_SWITCH])
 31.1221 +                    printf("    Line %ld column %d - Long line %d\n", linecnt, strlen(aline), strlen(aline));
 31.1222 +                else
 31.1223 +                    cnt_long++;
 31.1224 +                }
 31.1225 +            }
 31.1226 +
 31.1227 +        /* Check for line too short.                                     */
 31.1228 +        /* This one is a bit trickier to implement: we don't want to     */
 31.1229 +        /* flag the last line of a paragraph for being short, so we      */
 31.1230 +        /* have to wait until we know that our current line is a         */
 31.1231 +        /* "normal" line, then report the _previous_ line if it was too  */
 31.1232 +        /* short. We also don't want to report indented lines like       */
 31.1233 +        /* chapter heads or formatted quotations. We therefore keep      */
 31.1234 +        /* lastlen as the length of the last line examined, and          */
 31.1235 +        /* lastblen as the length of the last but one, and try to        */
 31.1236 +        /* suppress unnecessary warnings by checking that both were of   */
 31.1237 +        /* "normal" length. We keep the first character of the last      */
 31.1238 +        /* line in laststart, and if it was a space, we assume that the  */
 31.1239 +        /* formatting is deliberate. I can't figure out a way to         */
 31.1240 +        /* distinguish something like a quoted verse left-aligned or     */
 31.1241 +        /* the header or footer of a letter from a paragraph of short    */
 31.1242 +        /* lines - maybe if I examined the whole paragraph, and if the   */
 31.1243 +        /* para has less than, say, 8 lines and if all lines are short,  */
 31.1244 +        /* then just assume it's OK? Need to look at some texts to see   */
 31.1245 +        /* how often a formula like this would get the right result.     */
 31.1246 +        /* V0.99 changed the tolerance for length to ignore from 2 to 1  */
 31.1247 +        if (warn_short) {
 31.1248 +            if (strlen(aline) > 1
 31.1249 +                && lastlen > 1 && lastlen < SHORTEST_PG_LINE
 31.1250 +                && lastblen > 1 && lastblen > SHORTEST_PG_LINE
 31.1251 +                && laststart != CHAR_SPACE) {
 31.1252 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
 31.1253 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1254 +                        printf("    Line %ld column %d - Short line %d?\n", linecnt-1, strlen(prevline), strlen(prevline));
 31.1255 +                    else
 31.1256 +                        cnt_short++;
 31.1257 +                    }
 31.1258 +            }
 31.1259 +        lastblen = lastlen;
 31.1260 +        lastlen = strlen(aline);
 31.1261 +        laststart = aline[0];
 31.1262 +
 31.1263 +        /* look for punctuation at start of line */
 31.1264 +        if  (*aline && strchr(".?!,;:",  aline[0]))  {            /* if it's punctuation */
 31.1265 +            if (strncmp(". . .", aline, 5)) {   /* exception for ellipsis: V.98 tightened up to except only a full ellipsis */
 31.1266 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1267 +                if (!pswit[OVERVIEW_SWITCH])
 31.1268 +                    printf("    Line %ld column 1 - Begins with punctuation?\n", linecnt);
 31.1269 +                else
 31.1270 +                    cnt_punct++;
 31.1271 +                }
 31.1272 +            }
 31.1273 +
 31.1274 +        /* Check for spaced em-dashes                            */
 31.1275 +        /* V.20 must check _all_ occurrences of "--" on the line */
 31.1276 +        /* hence the loop - even if the first double-dash is OK  */
 31.1277 +        /* there may be another that's wrong later on.           */
 31.1278 +        if (warn_dash) {
 31.1279 +            s = aline;
 31.1280 +            while (strstr(s,"--")) {
 31.1281 +                if (*(strstr(s, "--")-1) == CHAR_SPACE ||
 31.1282 +                   (*(strstr(s, "--")+2) == CHAR_SPACE)) {
 31.1283 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1284 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1285 +                        printf("    Line %ld column %d - Spaced em-dash?\n", linecnt, (int) (strstr(s,"--") - aline) + 1);
 31.1286 +                    else
 31.1287 +                        cnt_dash++;
 31.1288 +                    }
 31.1289 +                s = strstr(s,"--") + 2;
 31.1290 +                }
 31.1291 +            }
 31.1292 +
 31.1293 +        /* Check for spaced dashes */
 31.1294 +        if (warn_dash)
 31.1295 +            if (strstr(aline," -")) {
 31.1296 +                if (*(strstr(aline, " -")+2) != '-') {
 31.1297 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1298 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1299 +                        printf("    Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline," -") - aline) + 1);
 31.1300 +                    else
 31.1301 +                        cnt_dash++;
 31.1302 +                    }
 31.1303 +                }
 31.1304 +            else
 31.1305 +                if (strstr(aline,"- ")) {
 31.1306 +                    if (*(strstr(aline, "- ")-1) != '-') {
 31.1307 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1308 +                        if (!pswit[OVERVIEW_SWITCH])
 31.1309 +                            printf("    Line %ld column %d - Spaced dash?\n", linecnt, (int) (strstr(aline,"- ") - aline) + 1);
 31.1310 +                        else
 31.1311 +                            cnt_dash++;
 31.1312 +                        }
 31.1313 +                    }
 31.1314 +
 31.1315 +        /* v 0.99                                                       */
 31.1316 +        /* Check for unmarked paragraphs indicated by separate speakers */
 31.1317 +        /* May well be false positive:                                  */
 31.1318 +        /* "Bravo!" "Wonderful!" called the crowd.                      */
 31.1319 +        /* but useful all the same.                                     */
 31.1320 +        s = wrk;
 31.1321 +        *s = 0;
 31.1322 +        if (strstr(aline, "\" \"")) s = strstr(aline, "\" \"");
 31.1323 +        if (strstr(aline, "\"  \"")) s = strstr(aline, "\"  \"");
 31.1324 +        if (*s) {
 31.1325 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1326 +            if (!pswit[OVERVIEW_SWITCH])
 31.1327 +                printf("    Line %ld column %d - Query missing paragraph break?\n", linecnt, (int)(s - aline) +1);
 31.1328 +            else
 31.1329 +                cnt_punct++;
 31.1330 +            }
 31.1331 +
 31.1332 +
 31.1333 +
 31.1334 +        /* Check for "to he" and other easy he/be errors          */
 31.1335 +        /* This is a very inadequate effort on the he/be problem, */
 31.1336 +        /* but the phrase "to he" is always an error, whereas "to */
 31.1337 +        /* be" is quite common. I chuckle when it does catch one! */
 31.1338 +        /* Similarly, '"Quiet!", be said.' is a non-be error      */
 31.1339 +        /* V .18 - "to he" is _not_ always an error!:             */
 31.1340 +        /*           "Where they went to he couldn't say."        */
 31.1341 +        /* but I'm leaving it in anyway.                          */
 31.1342 +        /* V .20 Another false positive:                          */
 31.1343 +        /*       What would "Cinderella" be without the . . .     */
 31.1344 +        /* and another "If he wants to he can see for himself."   */
 31.1345 +        /* V .21 Added " is be " and " be is " and " be was "     */
 31.1346 +        /* V .99 Added jeebies code -- removed again.             */
 31.1347 +        /*       Is jeebies code worth adding? Rare to see he/be  */
 31.1348 +        /*       errors with modern OCR. Separate program? Yes!   */
 31.1349 +        /*       jeebies does the job without cluttering up this. */
 31.1350 +        /*       We do get a few more queryable pairs from the    */
 31.1351 +        /*       project though -- they're cheap to implement.    */
 31.1352 +        /*       Also added a column number for guiguts.          */
 31.1353 +
 31.1354 +        s = wrk;
 31.1355 +        *s = 0;
 31.1356 +        if (strstr(aline," to he ")) s = strstr(aline," to he ");
 31.1357 +        if (strstr(aline,"\" be ")) s = strstr(aline,"\" be ");
 31.1358 +        if (strstr(aline,"\", be ")) s = strstr(aline,"\", be ");
 31.1359 +        if (strstr(aline," is be ")) s = strstr(aline," is be ");
 31.1360 +        if (strstr(aline," be is ")) s = strstr(aline," be is ");
 31.1361 +        if (strstr(aline," was be ")) s = strstr(aline," was be ");
 31.1362 +        if (strstr(aline," be would ")) s = strstr(aline," be would ");
 31.1363 +        if (strstr(aline," be could ")) s = strstr(aline," be could ");
 31.1364 +        if (*s) {
 31.1365 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1366 +            if (!pswit[OVERVIEW_SWITCH])
 31.1367 +                printf("    Line %ld column %d - Query he/be error?\n", linecnt, (int)(s - aline) +1);
 31.1368 +            else
 31.1369 +                cnt_word++;
 31.1370 +            }
 31.1371 +
 31.1372 +        s = wrk;
 31.1373 +        *s = 0;
 31.1374 +        if (strstr(aline," i bad ")) s = strstr(aline," i bad ");
 31.1375 +        if (strstr(aline," you bad ")) s = strstr(aline," you bad ");
 31.1376 +        if (strstr(aline," he bad ")) s = strstr(aline," he bad ");
 31.1377 +        if (strstr(aline," she bad ")) s = strstr(aline," she bad ");
 31.1378 +        if (strstr(aline," they bad ")) s = strstr(aline," they bad ");
 31.1379 +        if (strstr(aline," a had ")) s = strstr(aline," a had ");
 31.1380 +        if (strstr(aline," the had ")) s = strstr(aline," the had ");
 31.1381 +        if (*s) {
 31.1382 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1383 +            if (!pswit[OVERVIEW_SWITCH])
 31.1384 +                printf("    Line %ld column %d - Query had/bad error?\n", linecnt, (int)(s - aline) +1);
 31.1385 +            else
 31.1386 +                cnt_word++;
 31.1387 +            }
 31.1388 +
 31.1389 +
 31.1390 +        /* V .97 Added ", hut "  Not too common, hut pretty certain   */
 31.1391 +        /* V.99 changed to add a column number for guiguts            */
 31.1392 +        s = wrk;
 31.1393 +        *s = 0;
 31.1394 +        if (strstr(aline,", hut ")) s = strstr(aline,", hut ");
 31.1395 +        if (strstr(aline,"; hut ")) s = strstr(aline,"; hut ");
 31.1396 +        if (*s) {
 31.1397 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1398 +            if (!pswit[OVERVIEW_SWITCH])
 31.1399 +                printf("    Line %ld column %d - Query hut/but error?\n", linecnt, (int)(s - aline) +1);
 31.1400 +            else
 31.1401 +                cnt_word++;
 31.1402 +            }
 31.1403 +
 31.1404 +        /* Special case - angled bracket in front of "From" placed there by an MTA */
 31.1405 +        /* when sending an e-mail.  V .21                                          */
 31.1406 +        if (strstr(aline, ">From")) {
 31.1407 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1408 +            if (!pswit[OVERVIEW_SWITCH])
 31.1409 +                printf("    Line %ld column %d - Query angled bracket with From\n", linecnt, (int)(strstr(aline, ">From") - aline) +1);
 31.1410 +            else
 31.1411 +                cnt_punct++;
 31.1412 +            }
 31.1413 +
 31.1414 +        /* V 0.98 Check for a single character line - often an overflow from bad wrapping. */
 31.1415 +        if (*aline && !*(aline+1)) {
 31.1416 +            if (*aline == 'I' || *aline == 'V' || *aline == 'X' || *aline == 'L' || gcisdigit(*aline))
 31.1417 +                ; /* nothing - ignore numerals alone on a line. */
 31.1418 +            else {
 31.1419 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1420 +                if (!pswit[OVERVIEW_SWITCH])
 31.1421 +                    printf("    Line %ld column 1 - Query single character line\n", linecnt);
 31.1422 +                else
 31.1423 +                    cnt_punct++;
 31.1424 +                }
 31.1425 +            }
 31.1426 +
 31.1427 +        /* V 0.98 Check for I" - often should be ! */
 31.1428 +        if (strstr(aline, " I\"")) {
 31.1429 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1430 +            if (!pswit[OVERVIEW_SWITCH])
 31.1431 +                printf("    Line %ld column %ld - Query I=exclamation mark?\n", linecnt, strstr(aline, " I\"") - aline);
 31.1432 +            else
 31.1433 +                cnt_punct++;
 31.1434 +            }
 31.1435 +
 31.1436 +        /* V 0.98 Check for period without a capital letter. Cut-down from gutspell */
 31.1437 +        /*        Only works when it happens on a single line.                      */
 31.1438 +
 31.1439 +        if (pswit[PARANOID_SWITCH])
 31.1440 +            for (t = s = aline; strstr(t,". ");) {
 31.1441 +                t = strstr(t, ". ");
 31.1442 +                if (t == s)  {
 31.1443 +                    t++;
 31.1444 +                    continue; /* start of line punctuation is handled elsewhere */
 31.1445 +                    }
 31.1446 +                if (!gcisalpha(*(t-1))) {
 31.1447 +                    t++;
 31.1448 +                    continue;
 31.1449 +                    }
 31.1450 +                if (isDutch) {  /* For Frank & Jeroen -- 's Middags case */
 31.1451 +                    if (*(t+2) == CHAR_SQUOTE &&
 31.1452 +                      *(t+3)>='a' && *(t+3)<='z' &&
 31.1453 +                      *(t+4) == CHAR_SPACE &&
 31.1454 +                      *(t+5)>='A' && *(t+5)<='Z') {
 31.1455 +                        t++;
 31.1456 +                        continue;
 31.1457 +                        }
 31.1458 +                      }
 31.1459 +                s1 = t+2;
 31.1460 +                while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
 31.1461 +                    s1++;
 31.1462 +                if (*s1 >= 'a' && *s1 <= 'z') {  /* we have something to investigate */
 31.1463 +                    istypo = 1;
 31.1464 +                    for (s1 = t - 1; s1 >= s && 
 31.1465 +                        (gcisalpha(*s1) || gcisdigit(*s1) || 
 31.1466 +                        (*s1 == CHAR_SQUOTE && gcisalpha(*(s1+1)) && gcisalpha(*(s1-1)))); s1--); /* so let's go back and find out */
 31.1467 +                    s1++;
 31.1468 +                    for (i = 0; *s1 && *s1 != '.'; s1++, i++)
 31.1469 +                        testword[i] = *s1;
 31.1470 +                    testword[i] = 0;
 31.1471 +                    for (i = 0; *abbrev[i]; i++)
 31.1472 +                        if (!strcmp(testword, abbrev[i]))
 31.1473 +                            istypo = 0;
 31.1474 +//                    if (*testword >= 'A' && *testword <= 'Z') 
 31.1475 +//                        istypo = 0;
 31.1476 +                    if (gcisdigit(*testword)) istypo = 0;
 31.1477 +                    if (!*(testword+1)) istypo = 0;
 31.1478 +                    if (isroman(testword)) istypo = 0;
 31.1479 +                    if (istypo) {
 31.1480 +                        istypo = 0;
 31.1481 +                        for (i = 0; testword[i]; i++)
 31.1482 +                            if (strchr(vowels, testword[i]))
 31.1483 +                                istypo = 1;
 31.1484 +                        }
 31.1485 +                    if (istypo) {
 31.1486 +                        isdup = 0;
 31.1487 +                        if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
 31.1488 +                            for (i = 0; i < qperiod_index; i++)
 31.1489 +                                if (!strcmp(testword, qperiod[i])) {
 31.1490 +                                    isdup = 1;
 31.1491 +                                    }
 31.1492 +                        if (!isdup) {
 31.1493 +                            if (qperiod_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
 31.1494 +                                strcpy(qperiod[qperiod_index], testword);
 31.1495 +                                qperiod_index++;
 31.1496 +                                }
 31.1497 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1498 +                            if (!pswit[OVERVIEW_SWITCH])
 31.1499 +                                printf("    Line %ld column %d - Extra period?\n", linecnt, (int)(t - aline)+1);
 31.1500 +                            else
 31.1501 +                                cnt_punct++;
 31.1502 +                            }
 31.1503 +                        }
 31.1504 +                    }
 31.1505 +                t++;
 31.1506 +                }
 31.1507 +
 31.1508 +
 31.1509 +        if (pswit[TYPO_SWITCH]) {    /* Should have put this condition in at the start of 0.99. Duh! */
 31.1510 +            /* Check for words usually not followed by punctuation 0.99 */
 31.1511 +            for (s = aline; *s;) {
 31.1512 +                wordstart = s;
 31.1513 +                s = getaword(s, inword);
 31.1514 +                if (!*inword) continue;
 31.1515 +                lowerit(inword);
 31.1516 +                for (i = 0; *nocomma[i]; i++)
 31.1517 +                    if (!strcmp(inword, nocomma[i])) {
 31.1518 +                        if (*s == ',' || *s == ';' || *s == ':') {
 31.1519 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1520 +                            if (!pswit[OVERVIEW_SWITCH])
 31.1521 +                                printf("    Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
 31.1522 +                            else
 31.1523 +                                cnt_punct++;
 31.1524 +                            }
 31.1525 +                        }
 31.1526 +                for (i = 0; *noperiod[i]; i++)
 31.1527 +                    if (!strcmp(inword, noperiod[i])) {
 31.1528 +                        if (*s == '.' || *s == '!') {
 31.1529 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1530 +                            if (!pswit[OVERVIEW_SWITCH])
 31.1531 +                                printf("    Line %ld column %d - Query punctuation after %s?\n", linecnt, (int)(s - aline)+1, inword);
 31.1532 +                            else
 31.1533 +                                cnt_punct++;
 31.1534 +                            }
 31.1535 +                        }
 31.1536 +                }
 31.1537 +            }
 31.1538 +
 31.1539 +
 31.1540 +
 31.1541 +        /* Check for commonly mistyped words, and digits like 0 for O in a word */
 31.1542 +        for (s = aline; *s;) {
 31.1543 +            wordstart = s;
 31.1544 +            s = getaword(s, inword);
 31.1545 +            if (!*inword) continue; /* don't bother with empty lines */
 31.1546 +            if (mixdigit(inword)) {
 31.1547 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1548 +                if (!pswit[OVERVIEW_SWITCH])
 31.1549 +                    printf("    Line %ld column %ld - Query digit in %s\n", linecnt, (int)(wordstart - aline) + 1, inword);
 31.1550 +                else
 31.1551 +                    cnt_word++;
 31.1552 +                }
 31.1553 +
 31.1554 +            /* put the word through a series of tests for likely typos and OCR errors */
 31.1555 +            /* V.21 I had allowed lots of typo-checking even with the typo switch     */
 31.1556 +            /* turned off, but I really should disallow reporting of them when        */
 31.1557 +            /* the switch is off. Hence the "if" below.                               */
 31.1558 +            if (pswit[TYPO_SWITCH]) {
 31.1559 +                istypo = 0;
 31.1560 +                strcpy(testword, inword);
 31.1561 +                alower = 0;
 31.1562 +                for (i = 0; i < (signed int)strlen(testword); i++) { /* lowercase for testing */
 31.1563 +                    if (testword[i] >= 'a' && testword[i] <= 'z') alower = 1;
 31.1564 +                    if (alower && testword[i] >= 'A' && testword[i] <= 'Z') {
 31.1565 +                        /* we have an uppercase mid-word. However, there are common cases: */
 31.1566 +                        /*   Mac and Mc like McGill                                        */
 31.1567 +                        /*   French contractions like l'Abbe                               */
 31.1568 +                        if ((i == 2 && testword[0] == 'm' && testword[1] == 'c') ||
 31.1569 +                            (i == 3 && testword[0] == 'm' && testword[1] == 'a' && testword[2] == 'c') ||
 31.1570 +                            (i > 0 && testword[i-1] == CHAR_SQUOTE))
 31.1571 +                                ; /* do nothing! */
 31.1572 +
 31.1573 +                        else {  /* V.97 - remove separate case of uppercase within word so that         */
 31.1574 +                                /* names like VanAllen fall into qword_index and get reported only once */
 31.1575 +                            istypo = 1;
 31.1576 +                            }
 31.1577 +                        }
 31.1578 +                    testword[i] = (char)tolower(testword[i]);
 31.1579 +                    }
 31.1580 +
 31.1581 +                /* check for certain unlikely two-letter combinations at word start and end */
 31.1582 +                /* V.0.97 - this replaces individual hardcoded checks in previous versions */
 31.1583 +                if (strlen(testword) > 1) {
 31.1584 +                    for (i = 0; *nostart[i]; i++)
 31.1585 +                        if (!strncmp(testword, nostart[i], 2))
 31.1586 +                            istypo = 1;
 31.1587 +                    for (i = 0; *noend[i]; i++)
 31.1588 +                        if (!strncmp(testword + strlen(testword) -2, noend[i], 2))
 31.1589 +                            istypo = 1;
 31.1590 +                    }
 31.1591 +
 31.1592 +
 31.1593 +                /* ght is common, gbt never. Like that. */
 31.1594 +                if (strstr(testword, "cb")) istypo = 1;
 31.1595 +                if (strstr(testword, "gbt")) istypo = 1;
 31.1596 +                if (strstr(testword, "pbt")) istypo = 1;
 31.1597 +                if (strstr(testword, "tbs")) istypo = 1;
 31.1598 +                if (strstr(testword, "mrn")) istypo = 1;
 31.1599 +                if (strstr(testword, "ahle")) istypo = 1;
 31.1600 +                if (strstr(testword, "ihle")) istypo = 1;
 31.1601 +
 31.1602 +                /* "TBE" does happen - like HEARTBEAT - but uncommon.                    */
 31.1603 +                /*  Also "TBI" - frostbite, outbid - but uncommon.                       */
 31.1604 +                /*  Similarly "ii" like Hawaii, or Pompeii, and in Roman numerals,       */
 31.1605 +                /*  but these are covered in V.20. "ii" is a common scanno.              */
 31.1606 +                if (strstr(testword, "tbi")) istypo = 1;
 31.1607 +                if (strstr(testword, "tbe")) istypo = 1;
 31.1608 +                if (strstr(testword, "ii")) istypo = 1;
 31.1609 +
 31.1610 +                /* check for no vowels or no consonants. */
 31.1611 +                /* If none, flag a typo                  */
 31.1612 +                if (!istypo && strlen(testword)>1) {
 31.1613 +                    vowel = consonant = 0;
 31.1614 +                    for (i = 0; testword[i]; i++)
 31.1615 +                        if (testword[i] == 'y' || gcisdigit(testword[i])) {  /* Yah, this is loose. */
 31.1616 +                            vowel++;
 31.1617 +                            consonant++;
 31.1618 +                            }
 31.1619 +                        else
 31.1620 +                            if  (strchr(vowels, testword[i])) vowel++;
 31.1621 +                            else consonant++;
 31.1622 +                    if (!vowel || !consonant) {
 31.1623 +                        istypo = 1;
 31.1624 +                        }
 31.1625 +                    }
 31.1626 +
 31.1627 +                /* now exclude the word from being reported if it's in */
 31.1628 +                /* the okword list                                     */
 31.1629 +                for (i = 0; *okword[i]; i++)
 31.1630 +                    if (!strcmp(testword, okword[i]))
 31.1631 +                        istypo = 0;
 31.1632 +
 31.1633 +                /* what looks like a typo may be a Roman numeral. Exclude these */
 31.1634 +                if (istypo)
 31.1635 +                    if (isroman(testword))
 31.1636 +                        istypo = 0;
 31.1637 +
 31.1638 +                /* check the manual list of typos */
 31.1639 +                if (!istypo)
 31.1640 +                    for (i = 0; *typo[i]; i++)
 31.1641 +                        if (!strcmp(testword, typo[i]))
 31.1642 +                            istypo = 1;
 31.1643 +
 31.1644 +
 31.1645 +                /* V.21 - check lowercase s and l - special cases */
 31.1646 +                /* V.98 - added "i" and "m"                       */
 31.1647 +                /* V.99 - added "j" often a semi-colon gone wrong */
 31.1648 +                /*      - and "d" for a missing apostrophe - he d */
 31.1649 +                /*      - and "n" for "in"                        */
 31.1650 +                if (!istypo && strlen(testword) == 1)
 31.1651 +                    if (strchr("slmijdn", *inword))
 31.1652 +                        istypo = 1;
 31.1653 +
 31.1654 +
 31.1655 +                if (istypo) {
 31.1656 +                    isdup = 0;
 31.1657 +                    if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
 31.1658 +                        for (i = 0; i < qword_index; i++)
 31.1659 +                            if (!strcmp(testword, qword[i])) {
 31.1660 +                                isdup = 1;
 31.1661 +                                ++dupcnt[i];
 31.1662 +                                }
 31.1663 +                    if (!isdup) {
 31.1664 +                        if (qword_index < MAX_QWORD && strlen(testword) < MAX_QWORD_LENGTH) {
 31.1665 +                            strcpy(qword[qword_index], testword);
 31.1666 +                            qword_index++;
 31.1667 +                            }
 31.1668 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1669 +                        if (!pswit[OVERVIEW_SWITCH]) {
 31.1670 +                            printf("    Line %ld column %d - Query word %s", linecnt, (int)(wordstart - aline) + 1, inword);
 31.1671 +                            if (strlen(testword) < MAX_QWORD_LENGTH && !pswit[VERBOSE_SWITCH])
 31.1672 +                                printf(" - not reporting duplicates");
 31.1673 +                            printf("\n");
 31.1674 +                            }
 31.1675 +                        else
 31.1676 +                            cnt_word++;
 31.1677 +                        }
 31.1678 +                    }
 31.1679 +                }        /* end of typo-checking */
 31.1680 +
 31.1681 +                /* check the user's list of typos */
 31.1682 +                if (!istypo)
 31.1683 +                    if (usertypo_count)
 31.1684 +                        for (i = 0; i < usertypo_count; i++)
 31.1685 +                            if (!strcmp(testword, usertypo[i])) {
 31.1686 +                                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1687 +                                if (!pswit[OVERVIEW_SWITCH])  
 31.1688 +                                    printf("    Line %ld column %d - Query possible scanno %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
 31.1689 +                                }
 31.1690 +
 31.1691 +
 31.1692 +
 31.1693 +            if (pswit[PARANOID_SWITCH] && warn_digit) {   /* in paranoid mode, query all 0 and 1 standing alone - added warn_digit V.97*/
 31.1694 +                if (!strcmp(inword, "0") || !strcmp(inword, "1")) {
 31.1695 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1696 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1697 +                        printf("    Line %ld column %d - Query standalone %s\n", linecnt, (int)(wordstart - aline) + 2, inword);
 31.1698 +                    else
 31.1699 +                        cnt_word++;
 31.1700 +                    }
 31.1701 +                }
 31.1702 +            }
 31.1703 +
 31.1704 +        /* look for added or missing spaces around punctuation and quotes */
 31.1705 +        /* If there is a punctuation character like ! with no space on    */
 31.1706 +        /* either side, suspect a missing!space. If there are spaces on   */
 31.1707 +        /* both sides , assume a typo. If we see a double quote with no   */
 31.1708 +        /* space or punctuation on either side of it, assume unspaced     */
 31.1709 +        /* quotes "like"this.                                             */
 31.1710 +        llen = strlen(aline);
 31.1711 +        for (i = 1; i < llen; i++) {                               /* for each character in the line after the first */
 31.1712 +            if  (strchr(".?!,;:_", aline[i])) {                    /* if it's punctuation */
 31.1713 +                isacro = 0;                       /* we need to suppress warnings for acronyms like M.D. */
 31.1714 +                isellipsis = 0;                   /* we need to suppress warnings for ellipsis . . . */
 31.1715 +                if ( (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) ||     /* if there are letters on both sides of it or ... */
 31.1716 +                   (gcisalpha(aline[i+1]) && strchr("?!,;:", aline[i]))) { /* ...if it's strict punctuation followed by an alpha */
 31.1717 +                    if (aline[i] == '.') {
 31.1718 +                        if (i > 2)
 31.1719 +                            if (aline[i-2] == '.') isacro = 1;
 31.1720 +                        if (i + 2 < llen)
 31.1721 +                            if (aline[i+2] == '.') isacro = 1;
 31.1722 +                        }
 31.1723 +                    if (!isacro) {
 31.1724 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1725 +                        if (!pswit[OVERVIEW_SWITCH])
 31.1726 +                            printf("    Line %ld column %d - Missing space?\n", linecnt, i+1);
 31.1727 +                        else
 31.1728 +                            cnt_punct++;
 31.1729 +                        }
 31.1730 +                    }
 31.1731 +                if (aline[i-1] == CHAR_SPACE && (aline[i+1] == CHAR_SPACE || aline[i+1] == 0)) { /* if there are spaces on both sides, or space before and end of line */
 31.1732 +                    if (aline[i] == '.') {
 31.1733 +                        if (i > 2)
 31.1734 +                            if (aline[i-2] == '.') isellipsis = 1;
 31.1735 +                        if (i + 2 < llen)
 31.1736 +                            if (aline[i+2] == '.') isellipsis = 1;
 31.1737 +                        }
 31.1738 +                    if (!isemptyline && !isellipsis) {
 31.1739 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1740 +                        if (!pswit[OVERVIEW_SWITCH])
 31.1741 +                            printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
 31.1742 +                        else
 31.1743 +                            cnt_punct++;
 31.1744 +                        }
 31.1745 +                    }
 31.1746 +                }
 31.1747 +            }
 31.1748 +
 31.1749 +        /* 0.98 -- split out the characters that CANNOT be preceded by space */
 31.1750 +        llen = strlen(aline);
 31.1751 +        for (i = 1; i < llen; i++) {                             /* for each character in the line after the first */
 31.1752 +            if  (strchr("?!,;:", aline[i])) {                    /* if it's punctuation that _cannot_ have a space before it */
 31.1753 +                if (aline[i-1] == CHAR_SPACE && !isemptyline && aline[i+1] != CHAR_SPACE) { /* if aline[i+1) DOES == space, it was already reported just above */
 31.1754 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1755 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1756 +                        printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
 31.1757 +                    else
 31.1758 +                        cnt_punct++;
 31.1759 +                    }
 31.1760 +                }
 31.1761 +            }
 31.1762 +
 31.1763 +
 31.1764 +        /* 0.99 -- special case " .X" where X is any alpha. */
 31.1765 +        /* This plugs a hole in the acronym code above. Inelegant, but maintainable. */
 31.1766 +        llen = strlen(aline);
 31.1767 +        for (i = 1; i < llen; i++) {             /* for each character in the line after the first */
 31.1768 +            if  (aline[i] == '.') {              /* if it's a period */
 31.1769 +                if (aline[i-1] == CHAR_SPACE && gcisalpha(aline[i+1])) { /* if the period follows a space and is followed by a letter */
 31.1770 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1771 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1772 +                        printf("    Line %ld column %d - Spaced punctuation?\n", linecnt, i+1);
 31.1773 +                    else
 31.1774 +                        cnt_punct++;
 31.1775 +                    }
 31.1776 +                }
 31.1777 +            }
 31.1778 +
 31.1779 +
 31.1780 +
 31.1781 +
 31.1782 +        /* v.21 breaking out the search for unspaced doublequotes        */
 31.1783 +        /* This is not as efficient, but it's more maintainable          */
 31.1784 +        /* V.97 added underscore to the list of characters not to query, */
 31.1785 +        /* since underscores are commonly used as italics indicators.    */
 31.1786 +        /* V.98 Added slash as well, same reason.                        */
 31.1787 +        for (i = 1; i < llen; i++) {                               /* for each character in the line after the first */
 31.1788 +            if (aline[i] == CHAR_DQUOTE) {
 31.1789 +                if ((!strchr(" _-.'`,;:!/([{?}])",  aline[i-1]) &&
 31.1790 +                     !strchr(" _-.'`,;:!/([{?}])",  aline[i+1]) &&
 31.1791 +                     aline[i+1] != 0
 31.1792 +                     || (!strchr(" _-([{'`", aline[i-1]) && gcisalpha(aline[i+1])))) {
 31.1793 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1794 +                        if (!pswit[OVERVIEW_SWITCH])
 31.1795 +                            printf("    Line %ld column %d - Unspaced quotes?\n", linecnt, i+1);
 31.1796 +                        else
 31.1797 +                            cnt_punct++;
 31.1798 +                        }
 31.1799 +                }
 31.1800 +            }
 31.1801 +
 31.1802 +
 31.1803 +        /* v.98 check parity of quotes                             */
 31.1804 +        /* v.99 added !*(s+1) in some tests to catch "I am," he said, but I will not be soon". */
 31.1805 +        for (s = aline; *s; s++) {
 31.1806 +            if (*s == CHAR_DQUOTE) {
 31.1807 +                if (!(dquotepar = !dquotepar)) {    /* parity even */
 31.1808 +                    if (!strchr("_-.'`/,;:!?)]} ",  *(s+1))) {
 31.1809 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1810 +                        if (!pswit[OVERVIEW_SWITCH])
 31.1811 +                            printf("    Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
 31.1812 +                        else
 31.1813 +                            cnt_punct++;
 31.1814 +                        }
 31.1815 +                    }
 31.1816 +                else {                              /* parity odd */
 31.1817 +                    if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/.'`([{$",  *(s+1)) || !*(s+1)) {
 31.1818 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1819 +                        if (!pswit[OVERVIEW_SWITCH])
 31.1820 +                            printf("    Line %ld column %d - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
 31.1821 +                        else
 31.1822 +                            cnt_punct++;
 31.1823 +                        }
 31.1824 +                    }
 31.1825 +                }
 31.1826 +            }
 31.1827 +
 31.1828 +            if (*aline == CHAR_DQUOTE) {
 31.1829 +                if (strchr(",;:!?)]} ", aline[1])) {
 31.1830 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1831 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1832 +                        printf("    Line %ld column 1 - Wrongspaced quotes?\n", linecnt, (int)(s - aline)+1);
 31.1833 +                    else
 31.1834 +                        cnt_punct++;
 31.1835 +                    }
 31.1836 +                }
 31.1837 +
 31.1838 +        if (pswit[SQUOTE_SWITCH])
 31.1839 +            for (s = aline; *s; s++) {
 31.1840 +                if ((*s == CHAR_SQUOTE || *s == CHAR_OPEN_SQUOTE)
 31.1841 +                     && ( s == aline || (s > aline && !gcisalpha(*(s-1))) || !gcisalpha(*(s+1)))) {
 31.1842 +                    if (!(squotepar = !squotepar)) {    /* parity even */
 31.1843 +                        if (!strchr("_-.'`/\",;:!?)]} ",  *(s+1))) {
 31.1844 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1845 +                            if (!pswit[OVERVIEW_SWITCH])
 31.1846 +                                printf("    Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
 31.1847 +                            else
 31.1848 +                                cnt_punct++;
 31.1849 +                            }
 31.1850 +                        }
 31.1851 +                    else {                              /* parity odd */
 31.1852 +                        if (!gcisalpha(*(s+1)) && !isdigit(*(s+1)) && !strchr("_-/\".'`",  *(s+1)) || !*(s+1)) {
 31.1853 +                            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1854 +                            if (!pswit[OVERVIEW_SWITCH])
 31.1855 +                                printf("    Line %ld column %d - Wrongspaced singlequotes?\n", linecnt, (int)(s - aline)+1);
 31.1856 +                            else
 31.1857 +                                cnt_punct++;
 31.1858 +                            }
 31.1859 +                        }
 31.1860 +                    }
 31.1861 +                }
 31.1862 +                    
 31.1863 +
 31.1864 +        /* v.20 also look for double punctuation like ,. or ,,     */
 31.1865 +        /* Thanks to DW for the suggestion!                        */
 31.1866 +        /* I'm putting this in a separate loop for clarity         */
 31.1867 +        /* In books with references, ".," and ".;" are common      */
 31.1868 +        /* e.g. "etc., etc.," and vol. 1.; vol 3.;                 */
 31.1869 +        /* OTOH, from my initial tests, there are also fairly      */
 31.1870 +        /* common errors. What to do? Make these cases paranoid?   */
 31.1871 +        /* V.21 ".," is the most common, so invented warn_dotcomma */
 31.1872 +        /* to suppress detailed reporting if it occurs often       */
 31.1873 +        llen = strlen(aline);
 31.1874 +        for (i = 0; i < llen; i++)                  /* for each character in the line */
 31.1875 +            if (strchr(".?!,;:", aline[i])          /* if it's punctuation */
 31.1876 +            && (strchr(".?!,;:", aline[i+1]))
 31.1877 +            && aline[i] && aline[i+1])      /* followed by punctuation, it's a query, unless . . . */
 31.1878 +                if (
 31.1879 +                  (aline[i] == aline[i+1]
 31.1880 +                  && (aline[i] == '.' || aline[i] == '?' || aline[i] == '!'))
 31.1881 +                  || (!warn_dotcomma && aline[i] == '.' && aline[i+1] == ',')
 31.1882 +                  || (isFrench && !strncmp(aline+i, ",...", 4))
 31.1883 +                  || (isFrench && !strncmp(aline+i, "...,", 4))
 31.1884 +                  || (isFrench && !strncmp(aline+i, ";...", 4))
 31.1885 +                  || (isFrench && !strncmp(aline+i, "...;", 4))
 31.1886 +                  || (isFrench && !strncmp(aline+i, ":...", 4))
 31.1887 +                  || (isFrench && !strncmp(aline+i, "...:", 4))
 31.1888 +                  || (isFrench && !strncmp(aline+i, "!...", 4))
 31.1889 +                  || (isFrench && !strncmp(aline+i, "...!", 4))
 31.1890 +                  || (isFrench && !strncmp(aline+i, "?...", 4))
 31.1891 +                  || (isFrench && !strncmp(aline+i, "...?", 4))
 31.1892 +                ) {
 31.1893 +                if ((isFrench && !strncmp(aline+i, ",...", 4))    /* could this BE any more awkward? */
 31.1894 +                  || (isFrench && !strncmp(aline+i, "...,", 4))
 31.1895 +                  || (isFrench && !strncmp(aline+i, ";...", 4))
 31.1896 +                  || (isFrench && !strncmp(aline+i, "...;", 4))
 31.1897 +                  || (isFrench && !strncmp(aline+i, ":...", 4))
 31.1898 +                  || (isFrench && !strncmp(aline+i, "...:", 4))
 31.1899 +                  || (isFrench && !strncmp(aline+i, "!...", 4))
 31.1900 +                  || (isFrench && !strncmp(aline+i, "...!", 4))
 31.1901 +                  || (isFrench && !strncmp(aline+i, "?...", 4))
 31.1902 +                  || (isFrench && !strncmp(aline+i, "...?", 4)))
 31.1903 +                    i +=4;
 31.1904 +                        ; /* do nothing for .. !! and ?? which can be legit */
 31.1905 +                    }
 31.1906 +                else {
 31.1907 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1908 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1909 +                        printf("    Line %ld column %d - Double punctuation?\n", linecnt, i+1);
 31.1910 +                    else
 31.1911 +                        cnt_punct++;
 31.1912 +                    }
 31.1913 +
 31.1914 +        /* v.21 breaking out the search for spaced doublequotes */
 31.1915 +        /* This is not as efficient, but it's more maintainable */
 31.1916 +        s = aline;
 31.1917 +        while (strstr(s," \" ")) {
 31.1918 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1919 +            if (!pswit[OVERVIEW_SWITCH])
 31.1920 +                printf("    Line %ld column %d - Spaced doublequote?\n", linecnt, (int)(strstr(s," \" ")-aline+1));
 31.1921 +            else
 31.1922 +                cnt_punct++;
 31.1923 +            s = strstr(s," \" ") + 2;
 31.1924 +            }
 31.1925 +
 31.1926 +        /* v.20 also look for spaced singlequotes ' and `  */
 31.1927 +        s = aline;
 31.1928 +        while (strstr(s," ' ")) {
 31.1929 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1930 +            if (!pswit[OVERVIEW_SWITCH])
 31.1931 +                printf("    Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ' ")-aline+1));
 31.1932 +            else
 31.1933 +                cnt_punct++;
 31.1934 +            s = strstr(s," ' ") + 2;
 31.1935 +            }
 31.1936 +
 31.1937 +        s = aline;
 31.1938 +        while (strstr(s," ` ")) {
 31.1939 +            if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1940 +            if (!pswit[OVERVIEW_SWITCH])
 31.1941 +                printf("    Line %ld column %d - Spaced singlequote?\n", linecnt, (int)(strstr(s," ` ")-aline+1));
 31.1942 +            else
 31.1943 +                cnt_punct++;
 31.1944 +            s = strstr(s," ` ") + 2;
 31.1945 +            }
 31.1946 +
 31.1947 +        /* v.99 check special case of 'S instead of 's at end of word */
 31.1948 +        s = aline + 1;
 31.1949 +        while (*s) {
 31.1950 +            if (*s == CHAR_SQUOTE && *(s+1) == 'S' && *(s-1)>='a' && *(s-1)<='z')  {
 31.1951 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1952 +                if (!pswit[OVERVIEW_SWITCH])
 31.1953 +                    printf("    Line %ld column %d - Capital \"S\"?\n", linecnt, (int)(s-aline+2));
 31.1954 +                else
 31.1955 +                    cnt_punct++;
 31.1956 +                }
 31.1957 +            s++;
 31.1958 +            }
 31.1959 +
 31.1960 +
 31.1961 +        /* v.21 Now check special cases - start and end of line - */
 31.1962 +        /* for single and double quotes. Start is sometimes [sic] */
 31.1963 +        /* but better to query it anyway.                         */
 31.1964 +        /* While I'm here, check for dash at end of line          */
 31.1965 +        llen = strlen(aline);
 31.1966 +        if (llen > 1) {
 31.1967 +            if (aline[llen-1] == CHAR_DQUOTE ||
 31.1968 +                aline[llen-1] == CHAR_SQUOTE ||
 31.1969 +                aline[llen-1] == CHAR_OPEN_SQUOTE)
 31.1970 +                if (aline[llen-2] == CHAR_SPACE) {
 31.1971 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1972 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1973 +                        printf("    Line %ld column %d - Spaced quote?\n", linecnt, llen);
 31.1974 +                    else
 31.1975 +                        cnt_punct++;
 31.1976 +                    }
 31.1977 +            
 31.1978 +            /* V 0.98 removed aline[0] == CHAR_DQUOTE from the test below, since */
 31.1979 +            /* Wrongspaced quotes test also catches it for "                     */
 31.1980 +            if (aline[0] == CHAR_SQUOTE ||
 31.1981 +                aline[0] == CHAR_OPEN_SQUOTE)
 31.1982 +                if (aline[1] == CHAR_SPACE) {
 31.1983 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1984 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1985 +                        printf("    Line %ld column 1 - Spaced quote?\n", linecnt);
 31.1986 +                    else
 31.1987 +                        cnt_punct++;
 31.1988 +                    }
 31.1989 +            /* dash at end of line may well be legit - paranoid mode only */
 31.1990 +            /* and don't report em-dash at line-end                       */
 31.1991 +            if (pswit[PARANOID_SWITCH] && warn_hyphen) {
 31.1992 +                for (i = llen-1; i > 0 && (unsigned char)aline[i] <= CHAR_SPACE; i--);
 31.1993 +                if (aline[i] == '-' && aline[i-1] != '-') {
 31.1994 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.1995 +                    if (!pswit[OVERVIEW_SWITCH])
 31.1996 +                        printf("    Line %ld column %d - Hyphen at end of line?\n", linecnt, i);
 31.1997 +                    }
 31.1998 +                }
 31.1999 +            }
 31.2000 +
 31.2001 +        /* v.21 also look for brackets surrounded by alpha                    */
 31.2002 +        /* Brackets are often unspaced, but shouldn't be surrounded by alpha. */
 31.2003 +        /* If so, suspect a scanno like "a]most"                              */
 31.2004 +        llen = strlen(aline);
 31.2005 +        for (i = 1; i < llen-1; i++) {           /* for each character in the line except 1st & last*/
 31.2006 +            if (strchr("{[()]}", aline[i])         /* if it's a bracket */
 31.2007 +                && gcisalpha(aline[i-1]) && gcisalpha(aline[i+1])) {
 31.2008 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.2009 +                if (!pswit[OVERVIEW_SWITCH])
 31.2010 +                    printf("    Line %ld column %d - Unspaced bracket?\n", linecnt, i);
 31.2011 +                else
 31.2012 +                    cnt_punct++;
 31.2013 +                }
 31.2014 +            }
 31.2015 +        /* The "Cinderella" case, back in again! :-S Give it another shot */
 31.2016 +        if (warn_endquote) {
 31.2017 +            llen = strlen(aline);
 31.2018 +            for (i = 1; i < llen; i++) {           /* for each character in the line except 1st */
 31.2019 +                if (aline[i] == CHAR_DQUOTE)
 31.2020 +                    if (isalpha(aline[i-1])) {
 31.2021 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.2022 +                        if (!pswit[OVERVIEW_SWITCH])
 31.2023 +                            printf("    Line %ld column %d - endquote missing punctuation?\n", linecnt, i);
 31.2024 +                        else
 31.2025 +                            cnt_punct++;
 31.2026 +                        }
 31.2027 +                }
 31.2028 +            }
 31.2029 +
 31.2030 +        llen = strlen(aline);
 31.2031 +
 31.2032 +        /* Check for <HTML TAG> */
 31.2033 +        /* If there is a < in the line, followed at some point  */
 31.2034 +        /* by a > then we suspect HTML                          */
 31.2035 +        if (strstr(aline, "<") && strstr(aline, ">")) {
 31.2036 +            i = (signed int) (strstr(aline, ">") - strstr(aline, "<") + 1);
 31.2037 +            if (i > 0) {
 31.2038 +                strncpy(wrk, strstr(aline, "<"), i);
 31.2039 +                wrk[i] = 0;
 31.2040 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.2041 +                if (!pswit[OVERVIEW_SWITCH])
 31.2042 +                    printf("    Line %ld column %d - HTML Tag? %s \n", linecnt, (int)(strstr(aline, "<") - aline) + 1, wrk);
 31.2043 +                else
 31.2044 +                    cnt_html++;
 31.2045 +                }
 31.2046 +            }
 31.2047 +
 31.2048 +        /* Check for &symbol; HTML                   */
 31.2049 +        /* If there is a & in the line, followed at  */
 31.2050 +        /* some point by a ; then we suspect HTML    */
 31.2051 +        if (strstr(aline, "&") && strstr(aline, ";")) {
 31.2052 +            i = (int)(strstr(aline, ";") - strstr(aline, "&") + 1);
 31.2053 +            for (s = strstr(aline, "&"); s < strstr(aline, ";"); s++)   
 31.2054 +                if (*s == CHAR_SPACE) i = 0;                /* 0.99 don't report "Jones & Son;" */
 31.2055 +            if (i > 0) {
 31.2056 +                strncpy(wrk, strstr(aline,"&"), i);
 31.2057 +                wrk[i] = 0;
 31.2058 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", aline);
 31.2059 +                if (!pswit[OVERVIEW_SWITCH])
 31.2060 +                    printf("    Line %ld column %d - HTML symbol? %s \n", linecnt, (int)(strstr(aline, "&") - aline) + 1, wrk);
 31.2061 +                else
 31.2062 +                    cnt_html++;
 31.2063 +                }
 31.2064 +            }
 31.2065 +
 31.2066 +        /* At end of paragraph, check for mismatched quotes.           */
 31.2067 +        /* We don't want to report an error immediately, since it is a */
 31.2068 +        /* common convention to omit the quotes at end of paragraph if */
 31.2069 +        /* the next paragraph is a continuation of the same speaker.   */
 31.2070 +        /* Where this is the case, the next para should begin with a   */
 31.2071 +        /* quote, so we store the warning message and only display it  */
 31.2072 +        /* at the top of the next iteration if the new para doesn't    */
 31.2073 +        /* start with a quote.                                         */
 31.2074 +        /* The -p switch overrides this default, and warns of unclosed */
 31.2075 +        /* quotes on _every_ paragraph, whether the next begins with a */
 31.2076 +        /* quote or not.                                               */
 31.2077 +        /* Version .16 - only report mismatched single quotes if       */
 31.2078 +        /* an open_single_quotes was found.                            */
 31.2079 +
 31.2080 +        if (isemptyline) {          /* end of para - add up the totals */
 31.2081 +            if (quot % 2)
 31.2082 +                sprintf(dquote_err, "    Line %ld - Mismatched quotes\n", linecnt);
 31.2083 +            if (pswit[SQUOTE_SWITCH] && open_single_quote && (open_single_quote != close_single_quote) )
 31.2084 +                sprintf(squote_err,"    Line %ld - Mismatched singlequotes?\n", linecnt);
 31.2085 +            if (pswit[SQUOTE_SWITCH] && open_single_quote
 31.2086 +                                     && (open_single_quote != close_single_quote)
 31.2087 +                                     && (open_single_quote != close_single_quote +1) )
 31.2088 +                squot = 1;    /* flag it to be noted regardless of the first char of the next para */
 31.2089 +            if (r_brack)
 31.2090 +                sprintf(rbrack_err, "    Line %ld - Mismatched round brackets?\n", linecnt);
 31.2091 +            if (s_brack)
 31.2092 +                sprintf(sbrack_err, "    Line %ld - Mismatched square brackets?\n", linecnt);
 31.2093 +            if (c_brack)
 31.2094 +                sprintf(cbrack_err, "    Line %ld - Mismatched curly brackets?\n", linecnt);
 31.2095 +            if (c_unders % 2)
 31.2096 +                sprintf(unders_err, "    Line %ld - Mismatched underscores?\n", linecnt);
 31.2097 +            quot = s_brack = c_brack = r_brack = c_unders =
 31.2098 +                open_single_quote = close_single_quote = 0;
 31.2099 +            isnewpara = 1;     /* let the next iteration know that it's starting a new para */
 31.2100 +            }
 31.2101 +
 31.2102 +        /* V.21 _ALSO_ at end of paragraph, check for omitted punctuation. */
 31.2103 +        /*      by working back through prevline. DW.                      */
 31.2104 +        /* Hmmm. Need to check this only for "normal" paras.               */
 31.2105 +        /* So what is a "normal" para? ouch!                               */
 31.2106 +        /* Not normal if one-liner (chapter headings, etc.)                */
 31.2107 +        /* Not normal if doesn't contain at least one locase letter        */
 31.2108 +        /* Not normal if starts with space                                 */
 31.2109 +
 31.2110 +        /* 0.99 tighten up on para end checks. Disallow comma and */
 31.2111 +        /* semi-colon. Check for legit para end before quotes.    */
 31.2112 +        if (isemptyline) {          /* end of para */
 31.2113 +            for (s = prevline, i = 0; *s && !i; s++)
 31.2114 +                if (gcisletter(*s))
 31.2115 +                    i = 1;    /* use i to indicate the presence of a letter on the line */
 31.2116 +            /* This next "if" is a problem.                                             */
 31.2117 +            /* If I say "start_para_line <= linecnt - 1", that includes one-line        */
 31.2118 +            /* "paragraphs" like chapter heads. Lotsa false positives.                  */
 31.2119 +            /* If I say "start_para_line < linecnt - 1" it doesn't, but then it         */
 31.2120 +            /* misses genuine one-line paragraphs.                                      */
 31.2121 +            /* So what do I do? */
 31.2122 +            if (i
 31.2123 +                && lastblen > 2
 31.2124 +                && start_para_line < linecnt - 1
 31.2125 +                && *prevline > CHAR_SPACE
 31.2126 +                ) {
 31.2127 +                for (i = strlen(prevline)-1; (prevline[i] == CHAR_DQUOTE || prevline[i] == CHAR_SQUOTE) && prevline[i] > CHAR_SPACE && i > 0; i--);
 31.2128 +                for (  ; i > 0; i--) {
 31.2129 +                    if (gcisalpha(prevline[i])) {
 31.2130 +                        if (pswit[ECHO_SWITCH]) printf("\n%s\n", prevline);
 31.2131 +                        if (!pswit[OVERVIEW_SWITCH])
 31.2132 +                            printf("    Line %ld column %d - No punctuation at para end?\n", linecnt-1, strlen(prevline));
 31.2133 +                        else
 31.2134 +                            cnt_punct++;
 31.2135 +                        break;
 31.2136 +                        }
 31.2137 +                    if (strchr("-.:!([{?}])", prevline[i]))
 31.2138 +                        break;
 31.2139 +                    }
 31.2140 +                }
 31.2141 +            }
 31.2142 +        strcpy(prevline, aline);
 31.2143 +    }
 31.2144 +    fclose (infile);
 31.2145 +    if (!pswit[OVERVIEW_SWITCH])
 31.2146 +        for (i = 0; i < MAX_QWORD; i++)
 31.2147 +            if (dupcnt[i])
 31.2148 +                printf("\nNote: Queried word %s was duplicated %d time%s\n", qword[i], dupcnt[i], "s");
 31.2149 +}
 31.2150 +
 31.2151 +
 31.2152 +
 31.2153 +/* flgets - get one line from the input stream, checking for   */
 31.2154 +/* the existence of exactly one CR/LF line-end per line.       */
 31.2155 +/* Returns a pointer to the line.                              */
 31.2156 +
 31.2157 +char *flgets(char *theline, int maxlen, FILE *thefile, long lcnt)
 31.2158 +{
 31.2159 +    char c;
 31.2160 +    int len, isCR, cint;
 31.2161 +
 31.2162 +    *theline = 0;
 31.2163 +    len = isCR = 0;
 31.2164 +    c = cint = fgetc(thefile);
 31.2165 +    do {
 31.2166 +        if (cint == EOF)
 31.2167 +            return (NULL);
 31.2168 +        if (c == 10)  /* either way, it's end of line */
 31.2169 +            if (isCR)
 31.2170 +                break;
 31.2171 +            else {   /* Error - a LF without a preceding CR */
 31.2172 +                if (pswit[LINE_END_SWITCH]) {
 31.2173 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
 31.2174 +                    if (!pswit[OVERVIEW_SWITCH])
 31.2175 +                        printf("    Line %ld - No CR?\n", lcnt);
 31.2176 +                    else
 31.2177 +                        cnt_lineend++;
 31.2178 +                    }
 31.2179 +                break;
 31.2180 +                }
 31.2181 +        if (c == 13) {
 31.2182 +            if (isCR) { /* Error - two successive CRs */
 31.2183 +                if (pswit[LINE_END_SWITCH]) {
 31.2184 +                    if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
 31.2185 +                    if (!pswit[OVERVIEW_SWITCH])
 31.2186 +                        printf("    Line %ld - Two successive CRs?\n", lcnt);
 31.2187 +                    else
 31.2188 +                        cnt_lineend++;
 31.2189 +                    }
 31.2190 +                }
 31.2191 +            isCR = 1;
 31.2192 +            }
 31.2193 +        else {
 31.2194 +            if (pswit[LINE_END_SWITCH] && isCR) {
 31.2195 +                if (pswit[ECHO_SWITCH]) printf("\n%s\n", theline);
 31.2196 +                if (!pswit[OVERVIEW_SWITCH])
 31.2197 +                    printf("    Line %ld column %d - CR without LF?\n", lcnt, len+1);
 31.2198 +                else
 31.2199 +                    cnt_lineend++;
 31.2200 +                }
 31.2201 +             theline[len] = c;
 31.2202 +             len++;
 31.2203 +             theline[len] = 0;
 31.2204 +             isCR = 0;
 31.2205 +             }
 31.2206 +        c = cint = fgetc(thefile);
 31.2207 +    } while(len < maxlen);
 31.2208 +    if (pswit[MARKUP_SWITCH])  
 31.2209 +        postprocess_for_HTML(theline);
 31.2210 +    if (pswit[DP_SWITCH])  
 31.2211 +        postprocess_for_DP(theline);
 31.2212 +    return(theline);
 31.2213 +}
 31.2214 +
 31.2215 +
 31.2216 +
 31.2217 +
 31.2218 +/* mixdigit - takes a "word" as a parameter, and checks whether it   */
 31.2219 +/* contains a mixture of alpha and digits. Generally, this is an     */
 31.2220 +/* error, but may not be for cases like 4th or L5 12s. 3d.           */
 31.2221 +/* Returns 0 if no error found, 1 if error.                          */
 31.2222 +
 31.2223 +int mixdigit(char *checkword)   /* check for digits like 1 or 0 in words */
 31.2224 +{
 31.2225 +    int wehaveadigit, wehavealetter, firstdigits, query, wl;
 31.2226 +    char *s;
 31.2227 +
 31.2228 +
 31.2229 +    wehaveadigit = wehavealetter = query = 0;
 31.2230 +    for (s = checkword; *s; s++)
 31.2231 +        if (gcisalpha(*s))
 31.2232 +            wehavealetter = 1;
 31.2233 +        else
 31.2234 +            if (gcisdigit(*s))
 31.2235 +                wehaveadigit = 1;
 31.2236 +    if (wehaveadigit && wehavealetter) {         /* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
 31.2237 +        query = 1;
 31.2238 +        wl = strlen(checkword);
 31.2239 +        for (firstdigits = 0; gcisdigit(checkword[firstdigits]); firstdigits++)
 31.2240 +            ;
 31.2241 +        /* digits, ending in st, rd, nd, th of either case */
 31.2242 +        /* 0.99 donovan points out an error below. Turns out */
 31.2243 +        /*      I was using matchword like strcmp when the   */
 31.2244 +        /*      return values are different! Duh.            */
 31.2245 +        if (firstdigits + 2 == wl &&
 31.2246 +              (matchword(checkword + wl - 2, "st")
 31.2247 +            || matchword(checkword + wl - 2, "rd")
 31.2248 +            || matchword(checkword + wl - 2, "nd")
 31.2249 +            || matchword(checkword + wl - 2, "th"))
 31.2250 +            )
 31.2251 +                query = 0;
 31.2252 +        if (firstdigits + 3 == wl &&
 31.2253 +              (matchword(checkword + wl - 3, "sts")
 31.2254 +            || matchword(checkword + wl - 3, "rds")
 31.2255 +            || matchword(checkword + wl - 3, "nds")
 31.2256 +            || matchword(checkword + wl - 3, "ths"))
 31.2257 +            )
 31.2258 +                query = 0;
 31.2259 +        if (firstdigits + 3 == wl &&
 31.2260 +              (matchword(checkword + wl - 4, "stly")
 31.2261 +            || matchword(checkword + wl - 4, "rdly")
 31.2262 +            || matchword(checkword + wl - 4, "ndly")
 31.2263 +            || matchword(checkword + wl - 4, "thly"))
 31.2264 +            )
 31.2265 +                query = 0;
 31.2266 +
 31.2267 +        /* digits, ending in l, L, s or d */
 31.2268 +        if (firstdigits + 1 == wl &&
 31.2269 +            (checkword[wl-1] == 'l'
 31.2270 +            || checkword[wl-1] == 'L'
 31.2271 +            || checkword[wl-1] == 's'
 31.2272 +            || checkword[wl-1] == 'd'))
 31.2273 +                query = 0;
 31.2274 +        /* L at the start of a number, representing Britsh pounds, like L500  */
 31.2275 +        /* This is cute. We know the current word is mixeddigit. If the first */
 31.2276 +        /* letter is L, there must be at least one digit following. If both   */
 31.2277 +        /* digits and letters follow, we have a genuine error, else we have a */
 31.2278 +        /* capital L followed by digits, and we accept that as a non-error.   */
 31.2279 +        if (checkword[0] == 'L')
 31.2280 +            if (!mixdigit(checkword+1))
 31.2281 +                query = 0;
 31.2282 +        }
 31.2283 +    return (query);
 31.2284 +}
 31.2285 +
 31.2286 +
 31.2287 +
 31.2288 +
 31.2289 +/* getaword - extracts the first/next "word" from the line, and puts */
 31.2290 +/* it into "thisword". A word is defined as one English word unit    */
 31.2291 +/* -- or at least that's what I'm trying for.                        */
 31.2292 +/* Returns a pointer to the position in the line where we will start */
 31.2293 +/* looking for the next word.                                        */
 31.2294 +
 31.2295 +char *getaword(char *fromline, char *thisword)
 31.2296 +{
 31.2297 +    int i, wordlen;
 31.2298 +    char *s;
 31.2299 +
 31.2300 +    wordlen = 0;
 31.2301 +    for ( ; !gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline ; fromline++ );
 31.2302 +
 31.2303 +    /* V .20                                                                   */
 31.2304 +    /* add a look-ahead to handle exceptions for numbers like 1,000 and 1.35.  */
 31.2305 +    /* Especially yucky is the case of L1,000                                  */
 31.2306 +    /* I hate this, and I see other ways, but I don't see that any is _better_.*/
 31.2307 +    /* This section looks for a pattern of characters including a digit        */
 31.2308 +    /* followed by a comma or period followed by one or more digits.           */
 31.2309 +    /* If found, it returns this whole pattern as a word; otherwise we discard */
 31.2310 +    /* the results and resume our normal programming.                          */
 31.2311 +    s = fromline;
 31.2312 +    for (  ; (gcisdigit(*s) || gcisalpha(*s) || *s == ',' || *s == '.') && wordlen < MAXWORDLEN ; s++ ) {
 31.2313 +        thisword[wordlen] = *s;
 31.2314 +        wordlen++;
 31.2315 +        }
 31.2316 +    thisword[wordlen] = 0;
 31.2317 +    for (i = 1; i < wordlen -1; i++) {
 31.2318 +        if (thisword[i] == '.' || thisword[i] == ',') {
 31.2319 +            if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1])) {   /* we have one of the damned things */
 31.2320 +                fromline = s;
 31.2321 +                return(fromline);
 31.2322 +                }
 31.2323 +            }
 31.2324 +        }
 31.2325 +
 31.2326 +    /* we didn't find a punctuated number - do the regular getword thing */
 31.2327 +    wordlen = 0;
 31.2328 +    for (  ; (gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline == '\'') && wordlen < MAXWORDLEN ; fromline++ ) {
 31.2329 +        thisword[wordlen] = *fromline;
 31.2330 +        wordlen++;
 31.2331 +        }
 31.2332 +    thisword[wordlen] = 0;
 31.2333 +    return(fromline);
 31.2334 +}
 31.2335 +
 31.2336 +
 31.2337 +
 31.2338 +
 31.2339 +
 31.2340 +/* matchword - just a case-insensitive string matcher    */
 31.2341 +/* yes, I know this is not efficient. I'll worry about   */
 31.2342 +/* that when I have a clear idea where I'm going with it.*/
 31.2343 +
 31.2344 +int matchword(char *checkfor, char *thisword)
 31.2345 +{
 31.2346 +    unsigned int ismatch, i;
 31.2347 +
 31.2348 +    if (strlen(checkfor) != strlen(thisword)) return(0);
 31.2349 +
 31.2350 +    ismatch = 1;     /* assume a match until we find a difference */
 31.2351 +    for (i = 0; i <strlen(checkfor); i++)
 31.2352 +        if (toupper(checkfor[i]) != toupper(thisword[i]))
 31.2353 +            ismatch = 0;
 31.2354 +    return (ismatch);
 31.2355 +}
 31.2356 +
 31.2357 +
 31.2358 +
 31.2359 +
 31.2360 +
 31.2361 +/* lowerit - lowercase the line. Yes, strlwr does the same job,  */
 31.2362 +/* but not on all platforms, and I'm a bit paranoid about what   */
 31.2363 +/* some implementations of tolower might do to hi-bit characters,*/
 31.2364 +/* which shouldn't matter, but better safe than sorry.           */
 31.2365 +
 31.2366 +void lowerit(char *theline)
 31.2367 +{
 31.2368 +    for ( ; *theline; theline++)
 31.2369 +        if (*theline >='A' && *theline <='Z')
 31.2370 +            *theline += 32;
 31.2371 +}
 31.2372 +
 31.2373 +
 31.2374 +/* Is this word a Roman Numeral?                                    */
 31.2375 +/* v 0.99 improved to be better. It still doesn't actually          */
 31.2376 +/* validate that the number is a valid Roman Numeral -- for example */
 31.2377 +/* it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not*/
 31.2378 +/* what we're here to do. If it passes this, it LOOKS like a Roman  */
 31.2379 +/* numeral. Anyway, the actual Romans were pretty tolerant of bad   */
 31.2380 +/* arithmetic, or expressions thereof, except when it came to taxes.*/
 31.2381 +/* Allow any number of M, an optional D, an optional CM or CD,      */
 31.2382 +/* any number of optional Cs, an optional XL or an optional XC, an  */
 31.2383 +/* optional IX or IV, an optional V and any number of optional Is.  */
 31.2384 +/* Good enough for jazz chords.                                     */
 31.2385 +
 31.2386 +int isroman(char *t)
 31.2387 +{
 31.2388 +    char *s;
 31.2389 +
 31.2390 +    if (!t || !*t) return (0);
 31.2391 +
 31.2392 +    s = t;
 31.2393 +
 31.2394 +    while (*t == 'm' && *t ) t++;
 31.2395 +    if (*t == 'd') t++;
 31.2396 +    if (*t == 'c' && *(t+1) == 'm') t+=2;
 31.2397 +    if (*t == 'c' && *(t+1) == 'd') t+=2;
 31.2398 +    while (*t == 'c' && *t) t++;
 31.2399 +    if (*t == 'x' && *(t+1) == 'l') t+=2;
 31.2400 +    if (*t == 'x' && *(t+1) == 'c') t+=2;
 31.2401 +    if (*t == 'l') t++;
 31.2402 +    while (*t == 'x' && *t) t++;
 31.2403 +    if (*t == 'i' && *(t+1) == 'x') t+=2;
 31.2404 +    if (*t == 'i' && *(t+1) == 'v') t+=2;
 31.2405 +    if (*t == 'v') t++;
 31.2406 +    while (*t == 'i' && *t) t++;
 31.2407 +    if (!*t) return (1);
 31.2408 +
 31.2409 +    return(0);
 31.2410 +}
 31.2411 +
 31.2412 +
 31.2413 +
 31.2414 +
 31.2415 +/* gcisalpha is a special version that is somewhat lenient on 8-bit texts.     */
 31.2416 +/* If we use the standard isalpha() function, 8-bit accented characters break  */
 31.2417 +/* words, so that tete with accented characters appears to be two words, "t"   */
 31.2418 +/* and "t", with 8-bit characters between them. This causes over-reporting of  */
 31.2419 +/* errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)   */
 31.2420 +/* and ISO-8859-1 character sets, which are the most common PG 8-bit types.    */
 31.2421 +
 31.2422 +int gcisalpha(unsigned char c)
 31.2423 +{
 31.2424 +    if (c >='a' && c <='z') return(1);
 31.2425 +    if (c >='A' && c <='Z') return(1);
 31.2426 +    if (c < 140) return(0);
 31.2427 +    if (c >=192 && c != 208 && c != 215 && c != 222 && c != 240 && c != 247 && c != 254) return(1);
 31.2428 +    if (c == 140 || c == 142 || c == 156 || c == 158 || c == 159) return (1);
 31.2429 +    return(0);
 31.2430 +}
 31.2431 +
 31.2432 +/* gcisdigit is a special version that doesn't get confused in 8-bit texts.    */
 31.2433 +int gcisdigit(unsigned char c)
 31.2434 +{   
 31.2435 +    if (c >= '0' && c <='9') return(1);
 31.2436 +    return(0);
 31.2437 +}
 31.2438 +
 31.2439 +/* gcisletter is a special version that doesn't get confused in 8-bit texts.    */
 31.2440 +/* Yeah, we're ISO-8891-1-specific. So sue me.                                  */
 31.2441 +int gcisletter(unsigned char c)
 31.2442 +{   
 31.2443 +    if ((c >= 'A' && c <='Z') || (c >= 'a' && c <='z') || c >= 192) return(1);
 31.2444 +    return(0);
 31.2445 +}
 31.2446 +
 31.2447 +
 31.2448 +
 31.2449 +
 31.2450 +/* gcstrchr wraps strchr to return NULL if the character being searched for is zero */
 31.2451 +
 31.2452 +char *gcstrchr(char *s, char c)
 31.2453 +{
 31.2454 +    if (c == 0) return(NULL);
 31.2455 +    return(strchr(s,c));
 31.2456 +}
 31.2457 +
 31.2458 +/* postprocess_for_DP is derived from postprocess_for_HTML          */
 31.2459 +/* It is invoked with the -d switch from flgets().                  */
 31.2460 +/* It simply "removes" from the line a hard-coded set of common     */
 31.2461 +/* DP-specific tags, so that the line passed to the main routine has*/
 31.2462 +/* been pre-cleaned of DP markup.                                   */
 31.2463 +
 31.2464 +void postprocess_for_DP(char *theline)
 31.2465 +{
 31.2466 +
 31.2467 +    char *s, *t;
 31.2468 +    int i;
 31.2469 +
 31.2470 +    if (!*theline) 
 31.2471 +        return;
 31.2472 +
 31.2473 +    for (i = 0; *DPmarkup[i]; i++) {
 31.2474 +        s = strstr(theline, DPmarkup[i]);
 31.2475 +        while (s) {
 31.2476 +            t = s + strlen(DPmarkup[i]);
 31.2477 +            while (*t) {
 31.2478 +                *s = *t;
 31.2479 +                t++; s++;
 31.2480 +                }
 31.2481 +            *s = 0;
 31.2482 +            s = strstr(theline, DPmarkup[i]);
 31.2483 +            }
 31.2484 +        }
 31.2485 +
 31.2486 +}
 31.2487 +
 31.2488 +
 31.2489 +/* postprocess_for_HTML is, at the moment (0.97), a very nasty      */
 31.2490 +/* short-term fix for Charlz. Nasty, nasty, nasty.                  */
 31.2491 +/* It is invoked with the -m switch from flgets().                  */
 31.2492 +/* It simply "removes" from the line a hard-coded set of common     */
 31.2493 +/* HTML tags and "replaces" a hard-coded set of common HTML         */
 31.2494 +/* entities, so that the line passed to the main routine has        */
 31.2495 +/* been pre-cleaned of HTML. This is _so_ not the right way to      */
 31.2496 +/* deal with HTML, but what Charlz needs now is not HTML handling   */
 31.2497 +/* proper: just ignoring <i> tags and some others.                  */
 31.2498 +/* To be revisited in future releases!                              */
 31.2499 +
 31.2500 +void postprocess_for_HTML(char *theline)
 31.2501 +{
 31.2502 +
 31.2503 +    if (strstr(theline, "<") && strstr(theline, ">"))
 31.2504 +        while (losemarkup(theline))
 31.2505 +            ;
 31.2506 +    while (loseentities(theline))
 31.2507 +        ;
 31.2508 +}
 31.2509 +
 31.2510 +char *losemarkup(char *theline)
 31.2511 +{
 31.2512 +    char *s, *t;
 31.2513 +    int i;
 31.2514 +
 31.2515 +    if (!*theline) 
 31.2516 +        return(NULL);
 31.2517 +
 31.2518 +    s = strstr(theline, "<");
 31.2519 +    t = strstr(theline, ">");
 31.2520 +    if (!s || !t) return(NULL);
 31.2521 +    for (i = 0; *markup[i]; i++)
 31.2522 +        if (!tagcomp(s+1, markup[i])) {
 31.2523 +            if (!*(t+1)) {
 31.2524 +                *s = 0;
 31.2525 +                return(s);
 31.2526 +                }
 31.2527 +            else
 31.2528 +                if (t > s) {
 31.2529 +                    strcpy(s, t+1);
 31.2530 +                    return(s);
 31.2531 +                    }
 31.2532 +        }
 31.2533 +    /* it's an unrecognized <xxx> */
 31.2534 +    return(NULL);
 31.2535 +}
 31.2536 +
 31.2537 +char *loseentities(char *theline)
 31.2538 +{
 31.2539 +    int i;
 31.2540 +    char *s, *t;
 31.2541 +
 31.2542 +    if (!*theline) 
 31.2543 +        return(NULL);
 31.2544 +
 31.2545 +    for (i = 0; *entities[i].htmlent; i++) {
 31.2546 +        s = strstr(theline, entities[i].htmlent);
 31.2547 +        if (s) {
 31.2548 +            t = malloc((size_t)strlen(s));
 31.2549 +            if (!t) return(NULL);
 31.2550 +            strcpy(t, s + strlen(entities[i].htmlent));
 31.2551 +            strcpy(s, entities[i].textent);
 31.2552 +            strcat(s, t);
 31.2553 +            free(t);
 31.2554 +            return(theline);
 31.2555 +            }
 31.2556 +        }
 31.2557 +
 31.2558 +    /* V0.97 Duh. Forgot to check the htmlnum member */
 31.2559 +    for (i = 0; *entities[i].htmlnum; i++) {
 31.2560 +        s = strstr(theline, entities[i].htmlnum);
 31.2561 +        if (s) {
 31.2562 +            t = malloc((size_t)strlen(s));
 31.2563 +            if (!t) return(NULL);
 31.2564 +            strcpy(t, s + strlen(entities[i].htmlnum));
 31.2565 +            strcpy(s, entities[i].textent);
 31.2566 +            strcat(s, t);
 31.2567 +            free(t);
 31.2568 +            return(theline);
 31.2569 +            }
 31.2570 +        }
 31.2571 +    return(NULL);
 31.2572 +}
 31.2573 +
 31.2574 +
 31.2575 +int tagcomp(char *strin, char *basetag)
 31.2576 +{
 31.2577 +    char *s, *t;
 31.2578 +
 31.2579 +    s = basetag;
 31.2580 +    t  = strin;
 31.2581 +    if (*t == '/') t++; /* ignore a slash */
 31.2582 +    while (*s && *t) {
 31.2583 +        if (tolower(*s) != tolower(*t)) return(1);
 31.2584 +        s++; t++;
 31.2585 +        }
 31.2586 +    /* OK, we have < followed by a valid tag start  */
 31.2587 +    /* should I do something about length?          */
 31.2588 +    /* this is messy. The length of an <i> tag is   */
 31.2589 +    /* limited, but a <table> could go on for miles */
 31.2590 +    /* so I'd have to parse the tags . . . ugh.     */
 31.2591 +    /* It isn't what Charlz needs now, so mark it   */
 31.2592 +    /* as 'pending'.                                */
 31.2593 +    return(0);
 31.2594 +}
 31.2595 +
 31.2596 +void proghelp()                  /* explain program usage here */
 31.2597 +{
 31.2598 +    fputs("V. 0.991. Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
 31.2599 +    fputs("Gutcheck comes wih ABSOLUTELY NO WARRANTY. For details, read the file COPYING.\n", stderr);
 31.2600 +    fputs("This is Free Software; you may redistribute it under certain conditions (GPL);\n", stderr);
 31.2601 +    fputs("read the file COPYING for details.\n\n", stderr);
 31.2602 +    fputs("Usage is: gutcheck [-setpxloyhud] filename\n",stderr);
 31.2603 +    fputs("  where -s checks single quotes, -e suppresses echoing lines, -t checks typos\n",stderr);
 31.2604 +    fputs("  -x (paranoid) switches OFF -t and extra checks, -l turns OFF line-end checks\n",stderr);
 31.2605 +    fputs("  -o just displays overview without detail, -h echoes header fields\n",stderr);
 31.2606 +    fputs("  -v (verbose) unsuppresses duplicate reporting, -m suppresses markup\n",stderr);
 31.2607 +    fputs("  -d ignores DP-specific markup,\n",stderr);
 31.2608 +    fputs("  -u uses a file gutcheck.typ to query user-defined possible typos\n",stderr);
 31.2609 +    fputs("Sample usage: gutcheck warpeace.txt \n",stderr);
 31.2610 +    fputs("\n",stderr);
 31.2611 +    fputs("Gutcheck looks for errors in Project Gutenberg(TM) etexts.\n", stderr);
 31.2612 +    fputs("Gutcheck queries anything it thinks shouldn't be in a PG text; non-ASCII\n",stderr);
 31.2613 +    fputs("characters like accented letters, lines longer than 75 or shorter than 55,\n",stderr);
 31.2614 +    fputs("unbalanced quotes or brackets, a variety of badly formatted punctuation, \n",stderr);
 31.2615 +    fputs("HTML tags, some likely typos. It is NOT a substitute for human judgement.\n",stderr);
 31.2616 +    fputs("\n",stderr);
 31.2617 +}
 31.2618 +
 31.2619 +
 31.2620 +
 31.2621 +/*********************************************************************
 31.2622 +  Revision History:
 31.2623 +
 31.2624 +  04/22/01 Cleaned up some stuff and released .10
 31.2625 +
 31.2626 +           ---------------
 31.2627 +
 31.2628 +  05/09/01 Added the typo list, added two extra cases of he/be error,
 31.2629 +           added -p switch, OPEN_SINGLE QUOTE char as .11
 31.2630 +
 31.2631 +           ---------------
 31.2632 +
 31.2633 +  05/20/01 Increased the typo list,
 31.2634 +           added paranoid mode,
 31.2635 +           ANSIfied the code and added some casts
 31.2636 +              so the compiler wouldn't keep asking if I knew what I was doing,
 31.2637 +           fixed bug in l.s.d. condition (thanks, Dave!),
 31.2638 +           standardized spacing when echoing,
 31.2639 +           added letter-combo checking code to typo section,
 31.2640 +           added more h/b words to typo array.
 31.2641 +           Not too sure about putting letter combos outside of the TYPO conditions -
 31.2642 +           someone is sure to have a book about the tbaka tribe, or something. Anyway, let's see.
 31.2643 +           Released as .12
 31.2644 +
 31.2645 +           ---------------
 31.2646 +
 31.2647 +  06/01/01 Removed duplicate reporting of Tildes, asterisks, etc.
 31.2648 +  06/10/01 Added flgets routine to help with platform-independent
 31.2649 +           detection of invalid line-ends. All PG text files should
 31.2650 +           have CR/LF (13/10) at end of line, regardless of system.
 31.2651 +           Gutcheck now validates this by default. (Thanks, Charles!)
 31.2652 +           Released as .13
 31.2653 +
 31.2654 +           ---------------
 31.2655 +
 31.2656 +  06/11/01 Added parenthesis match checking. (c_brack, cbrack_err etc.)
 31.2657 +           Released as .14
 31.2658 +
 31.2659 +           ---------------
 31.2660 +
 31.2661 +  06/23/01 Fixed: 'No',he said. not being flagged.
 31.2662 +
 31.2663 +           Improved: better single-quotes checking:
 31.2664 +
 31.2665 +           Ignore singlequotes surrounded by alpha, like didn't. (was OK)
 31.2666 +
 31.2667 +           If a singlequote is at the END of a word AND the word ends in "s":
 31.2668 +                  The dogs' tails wagged.
 31.2669 +           it's probably an apostrophe, but less commonly may be a closequote:
 31.2670 +                  "These 'pack dogs' of yours look more like wolves."
 31.2671 +
 31.2672 +           If it's got punctuation before it and is followed by a space
 31.2673 +           or punctuation:
 31.2674 +              . . . was a problem,' he said
 31.2675 +              . . . was a problem,'"
 31.2676 +           it is probably (certainly?) a closequote.
 31.2677 +
 31.2678 +           If it's at start of paragraph, it's probably an openquote.
 31.2679 +              (but watch dialect)
 31.2680 +
 31.2681 +           Words with ' at beginning and end are probably quoted:
 31.2682 +               "You have the word 'chivalry' frequently on your lips."
 31.2683 +               (Not specifically implemented)
 31.2684 +           V.18 I'm glad I didn't implement this, 'cos it jest ain't so
 31.2685 +           where the convention is to punctuate outside the quotes.
 31.2686 +               'Come', he said, 'and join the party'.
 31.2687 +
 31.2688 +           If it is followed by an alpha, and especially a capital:
 31.2689 +              'Hello,' called he.
 31.2690 +           it is either an openquote or dialect.
 31.2691 +
 31.2692 +           Dialect breaks ALL the rules:
 31.2693 +                  A man's a man for a' that.
 31.2694 +                  "Aye, but 'tis all in the pas' now."
 31.2695 +                  "'Tis often the way," he said.
 31.2696 +                  'Ave a drink on me.
 31.2697 +
 31.2698 +           This version looks to be an improvement, and produces
 31.2699 +           fewer false positives, but is still not perfect. The
 31.2700 +           'pack dogs' case still fools it, and dialect is still
 31.2701 +           a problem. Oh, well, it's an improvement, and I have
 31.2702 +           a weighted structure in place for refining guesses at
 31.2703 +           closequotes. Maybe next time, I'll add a bit of logic
 31.2704 +           where if there is an open quote and one that was guessed
 31.2705 +           to be a possessive apostrophe after s, I'll re-guess it
 31.2706 +           to be a closequote. Let's see how this one flies, first.
 31.2707 +
 31.2708 +           (Afterview: it's still crap. Needs much work, and a deeper insight.)
 31.2709 +
 31.2710 +           Released as .15
 31.2711 +
 31.2712 +           TODO: More he/be checks. Can't be perfect - counterexamples:
 31.2713 +              I gave my son good advice: be married regardless of the world's opinion.
 31.2714 +              I gave my son good advice: he married regardless of the world's opinion.
 31.2715 +
 31.2716 +              If by "primitive" be meant "crude", we can understand the sentence.
 31.2717 +              If by "primitive" he meant "crude", we can understand the sentence.
 31.2718 +
 31.2719 +              No matter what be said, I must go on.
 31.2720 +              No matter what he said, I must go on.
 31.2721 +
 31.2722 +              No value, however great, can be set upon them.
 31.2723 +              No value, however great, can he set upon them.
 31.2724 +
 31.2725 +              Real-Life one from a DP International Weekly Miscellany:
 31.2726 +                He wandered through the forest without fear, sleeping
 31.2727 +                much, for in sleep be had companionship--the Great
 31.2728 +                Spirit teaching him what he should know in dreams.
 31.2729 +                That one found by jeebies, and it turned out to be "he".
 31.2730 +
 31.2731 +
 31.2732 +           ---------------
 31.2733 +
 31.2734 +  07/01/01 Added -O option.
 31.2735 +           Improved singlequotes by reporting mismatched single quotes
 31.2736 +           only if an open_single_quotes was found.
 31.2737 +
 31.2738 +           Released as .16
 31.2739 +
 31.2740 +           ---------------
 31.2741 +
 31.2742 +  08/27/01 Added -Y switch for Robert Rowe to allow his app to
 31.2743 +           catch the error output.
 31.2744 +
 31.2745 +           Released as .17
 31.2746 +
 31.2747 +           ---------------
 31.2748 +
 31.2749 +  09/08/01 Added checking Capitals at start of paragraph, but not
 31.2750 +           checking them at start of sentence.
 31.2751 +
 31.2752 +           TODO: Parse sentences out so can check reliably for start of
 31.2753 +                 sentence. Need a whole different approach for that.
 31.2754 +                 (Can't just rely on periods, since they are also
 31.2755 +                 used for abbreviations, etc.)
 31.2756 +
 31.2757 +           Added checking for all vowels or all consonants in a word.
 31.2758 +
 31.2759 +           While I was in, I added "ii" checking and "tl" at start of word.
 31.2760 +
 31.2761 +           Added echoing of first line of paragraph when reporting
 31.2762 +           mismatched quoted or brackets (thanks to David Widger for the
 31.2763 +           suggestion)
 31.2764 +
 31.2765 +           Not querying L at start of a number (used for British pounds).
 31.2766 +
 31.2767 +           The spelling changes are sort of half-done but released anyway
 31.2768 +           Skipped .18 because I had given out a couple of test versions
 31.2769 +           with that number.
 31.2770 +
 31.2771 +  09/25/01 Released as .19
 31.2772 +
 31.2773 +           ---------------
 31.2774 +
 31.2775 +           TODO:
 31.2776 +           Use the logic from my new version of safewrap to stop querying
 31.2777 +             short lines like poems and TOCs.
 31.2778 +           Ignore non-standard ellipses like .  .  . or ...
 31.2779 +
 31.2780 +
 31.2781 +           ---------------
 31.2782 +  10/01/01 Made any line over 80 a VERY long line (was 85).
 31.2783 +           Recognized openquotes on indented paragraphs as continuations
 31.2784 +               of the same speech.
 31.2785 +           Added "cf" to the okword list (how did I forget _that_?) and a few others.
 31.2786 +           Moved abbrev to okword and made it more general.
 31.2787 +           Removed requirement that PG_space_emdash be greater than
 31.2788 +               ten before turning off warnings about spaced dashes.
 31.2789 +           Added period to list of characters that might constitute a separator line.
 31.2790 +           Now checking for double punctuation (Thanks, David!)
 31.2791 +           Now if two spaced em-dashes on a line, reports both. (DW)
 31.2792 +           Bug: Wasn't catching spaced punctuation at line-end since I
 31.2793 +               added flgets in version .13 - fixed.
 31.2794 +           Bug: Wasn't catching spaced singlequotes - fixed
 31.2795 +           Now reads punctuated numbers like 1,000 as a single word.
 31.2796 +               (Used to give "standalone 1" type  queries)
 31.2797 +           Changed paranoid mode - not including s and p options. -ex is now quite usable.
 31.2798 +           Bug: was calling `"For it is perfectly impossible,"    Unspaced Quotes - fixed
 31.2799 +           Bug: Sometimes gave _next_ line number for queried word at end of line - fixed
 31.2800 +
 31.2801 +  10/22/01 Released as .20
 31.2802 +
 31.2803 +           ---------------
 31.2804 +
 31.2805 +           Added count of lines with spaces at end. (cnt_spacend) (Thanks, Brett!)
 31.2806 +           Reduced the number of hi-bit letters needed to stop reporting them
 31.2807 +               from 1/20 to 1/100 or 200 in total.
 31.2808 +           Added PG footer check.
 31.2809 +           Added the -h switch.
 31.2810 +           Fixed platform-specific CHAR_EOL checking for isemptyline - changed to 13 and 10
 31.2811 +           Not reporting ".," when there are many of them, such as a book with many references to "Vol 1., p. 23"
 31.2812 +           Added unspaced brackets check when surrounded by alpha.
 31.2813 +           Removed all typo reporting unless the typo switch is on.
 31.2814 +           Added gcisalpha to ease over-reporting of 8-bit queries.
 31.2815 +           ECHO_SWITCH is now ON by default!
 31.2816 +           PARANOID_SWITCH is now ON by default!
 31.2817 +           Checking for ">From" placed there by e-mail MTA (Thanks Andrew & Greg)
 31.2818 +           Checking for standalone lowercase "l"
 31.2819 +           Checking for standalone lowercase "s"
 31.2820 +           Considering "is be" and "be is" "be was" "was be" as he/be errors
 31.2821 +           Looking at punct at end of para
 31.2822 +
 31.2823 +  01/20/02 Released as .21
 31.2824 +
 31.2825 +           ---------------
 31.2826 +
 31.2827 +           Added VERBOSE_SWITCH to make it list everything. (George Davis)
 31.2828 +
 31.2829 +           ---------------
 31.2830 +
 31.2831 +  02/17/02 Added cint in flgets to try fix an EOF failure on a compiler I don't have.
 31.2832 +           after which
 31.2833 +           This line caused a coredump on Solaris - fixed.
 31.2834 +                Da sagte die Figur: " Das ist alles gar schoen, und man mag die Puppe
 31.2835 +  03/09/02 Changed header recognition for another header change
 31.2836 +           Called it .24
 31.2837 +  03/29/02 Added qword[][] so I can suppress massive overreporting
 31.2838 +           of queried "words" like "FN", "Wm.", "th'", people's 
 31.2839 +           initials, chemical formulae and suchlike in some texts.
 31.2840 +           Called it .25
 31.2841 +  04/07/02 The qword summary reports at end shouldn't show in OVERVIEW mode. Fixed.
 31.2842 +           Added linecounts in overview mode.
 31.2843 +           Wow! gutcheck gutcheck.exe doesn't report a binary! :-) Need to tighten up. Done.
 31.2844 +           "m" is a not uncommon scanno for "in", but also appears in "a.m." - Can I get round that?
 31.2845 +  07/07/02 Added GPL.
 31.2846 +           Added checking for broken em-dash at line-end (enddash)
 31.2847 +           Released as 0.95
 31.2848 +  08/17/02 Fixed a bug that treated some hi-bit characters as spaces. Thanks, Carlo.
 31.2849 +           Released as 0.96
 31.2850 +  10/10/02 Suppressing some annoying multiple reports by default:
 31.2851 +           Standalone Ones, Asterisks, Square Brackets.
 31.2852 +              Digit 1 occurs often in many scientific texts.
 31.2853 +              Asterisk occurs often in multi-footnoted texts.
 31.2854 +              Mismatch Square Brackets occurs often in multi-para footnotes.
 31.2855 +           Added -m switch for Charlz. Horrible. Nasty. Kludgy. Evil.
 31.2856 +              . . . but it does more or less work for the main cases.
 31.2857 +           Removed uppercase within a word as a separate category so
 31.2858 +           that names like VanAllen get reported only once, like other
 31.2859 +           suspected typos.
 31.2860 +  11/24/02 Fixed - -m switch wasn't looking at htmlnum in
 31.2861 +           loseentities (Thanks, Brett!)
 31.2862 +           Fixed bug which occasionally gave false warning of
 31.2863 +           paragraph starting with lowercase.
 31.2864 +           Added underscore as character not to query around doublequotes.
 31.2865 +           Split the "Non-ASCII" message into "Non-ASCII" vs. "Non-ISO-8859"
 31.2866 +           . . . this is to help detect things like CP1252 characters.
 31.2867 +           Released as 0.97
 31.2868 +
 31.2869 +  12/01/02 Hacked a simplified version of the "Wrongspaced quotes" out of gutspell,
 31.2870 +           for doublequotes only. Replaces "Spaced quote", since it also covers that
 31.2871 +           case.
 31.2872 +           Added "warn_hyphen" to ease over-reporting of hyphens.
 31.2873 +
 31.2874 +  12/20/02 Added "extra period" checks.
 31.2875 +           Added single character line check
 31.2876 +           Added I" check - is usually an exclam
 31.2877 +           Released as 0.98
 31.2878 +
 31.2879 +  1/5/03   Eeek! Left in a lowerit(argv[0]) at the start before procfile()
 31.2880 +           from when I was looking at ways to identify markup. Refuses to
 31.2881 +           open files for *nix users with upcase in the filemanes. Removed.
 31.2882 +           Fixed quickly and released as 0.981
 31.2883 +
 31.2884 +  1/8/03   Added "arid" to the list of typos, slightly against my better
 31.2885 +           judgement, but the DP gang are all excited about it. :-)
 31.2886 +           Added a check for comma followed by capital letter, where
 31.2887 +           a period has OCRed into a comma. (DW). Not sure about this
 31.2888 +           either; we'll see.
 31.2889 +           Compiling for Win32 to allow longfilenames.
 31.2890 +
 31.2891 +  6/1/04   A messy test release for DW to include the "gutcheck.typ"
 31.2892 +           process. And the gutcheck.jee trials. Removed "arid" --
 31.2893 +           it can go in gutcheck.typ
 31.2894 +
 31.2895 +           Added checks for carats ^ and slants / but disabling slant
 31.2896 +           queries if more than 20 of them, because some people use them
 31.2897 +           for /italics/. Slants are commonly mistaken italic "I"s.
 31.2898 +
 31.2899 +           Later: removed gutcheck.jee -- wrote jeebies instead.
 31.2900 +
 31.2901 +Random TODO: 
 31.2902 +           Check brackets more closely, like quotes, so that it becomes
 31.2903 +           easy to find the error in long paragraphs full of brackets.
 31.2904 +
 31.2905 +
 31.2906 +  11/4/04  Assorted cleanup. Fixed case where text started with an
 31.2907 +           unbalanced paragraph.
 31.2908 +
 31.2909 +  1/2/05   Has it really been that long? Added "nocomma", "noperiod" check.
 31.2910 +           Bits and pieces: improved isroman(). Added isletter().
 31.2911 +           Other stuff I never noted before this.
 31.2912 +
 31.2913 +  7/3/05   Stuck in a quick start on DP-markup ignoring 
 31.2914 +           at BillFlis's suggestion.
 31.2915 +
 31.2916 +  1/23/06  Took out nocomma etc if typos are off. Why did I ever leave that in?
 31.2917 +           Don't count footer for dotcomma etc.
 31.2918 +
 31.2919 +
 31.2920 +1       I
 31.2921 +ail     all
 31.2922 +arc     are
 31.2923 +arid    and
 31.2924 +bad     had
 31.2925 +ball    hall
 31.2926 +band    hand
 31.2927 +bar     her
 31.2928 +bat     but
 31.2929 +be      he
 31.2930 +bead    head
 31.2931 +beads   heads
 31.2932 +bear    hear
 31.2933 +bit     hit
 31.2934 +bo      be
 31.2935 +boon    been
 31.2936 +borne   home
 31.2937 +bow     how
 31.2938 +bumbled humbled
 31.2939 +car     ear
 31.2940 +carnage carriage
 31.2941 +carne   came
 31.2942 +cast    east
 31.2943 +cat     cut
 31.2944 +cat     eat
 31.2945 +cheek   check
 31.2946 +clay    day
 31.2947 +coining coming
 31.2948 +comer   corner
 31.2949 +die     she
 31.2950 +docs    does
 31.2951 +ease    case
 31.2952 +fail    fall
 31.2953 +fee     he
 31.2954 +haying  having
 31.2955 +ho      he
 31.2956 +ho      who
 31.2957 +hut     but
 31.2958 +is      as
 31.2959 +lie     he
 31.2960 +lime    time
 31.2961 +loth    10th
 31.2962 +m       in
 31.2963 +modem   modern
 31.2964 +Ms      his
 31.2965 +ray     away
 31.2966 +ray     my
 31.2967 +ringer  finger
 31.2968 +ringers fingers
 31.2969 +rioted  noted
 31.2970 +tho     the
 31.2971 +tie     he
 31.2972 +tie     the
 31.2973 +tier    her
 31.2974 +tight   right
 31.2975 +tile    the
 31.2976 +tiling  thing
 31.2977 +tip     up
 31.2978 +tram    train
 31.2979 +tune    time
 31.2980 +u       "
 31.2981 +wen     well
 31.2982 +yon     you
 31.2983 +
 31.2984 +*********************************************************************/
 31.2985 +
    32.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    32.2 +++ b/gutcheck/gutcheck.typ.in	Tue Jan 24 23:54:05 2012 +0000
    32.3 @@ -0,0 +1,54 @@
    32.4 +11
    32.5 +44
    32.6 +ms
    32.7 +ail
    32.8 +alien
    32.9 +arc
   32.10 +arid
   32.11 +bar
   32.12 +bat
   32.13 +bo
   32.14 +borne
   32.15 +bow
   32.16 +bum
   32.17 +bumbled
   32.18 +carnage
   32.19 +carne
   32.20 +cither
   32.21 +coining
   32.22 +comer
   32.23 +cur
   32.24 +docs
   32.25 +eve
   32.26 +eves
   32.27 +gaming
   32.28 +gram
   32.29 +guru
   32.30 +hag
   32.31 +hare
   32.32 +haying
   32.33 +ho
   32.34 +lime
   32.35 +loth
   32.36 +m
   32.37 +modem
   32.38 +nave
   32.39 +ringer
   32.40 +ringers
   32.41 +riot
   32.42 +rioted
   32.43 +signer
   32.44 +snore
   32.45 +spam
   32.46 +tho
   32.47 +tier
   32.48 +tile
   32.49 +tiling
   32.50 +tram
   32.51 +tum
   32.52 +tune
   32.53 +u
   32.54 +vas
   32.55 +wag
   32.56 +wen
   32.57 +yon
    33.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    33.2 +++ b/test/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
    33.3 @@ -0,0 +1,1 @@
    33.4 +SUBDIRS=harness compatibility .
    34.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.2 +++ b/test/compatibility/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
    34.3 @@ -0,0 +1,7 @@
    34.4 +TESTS_ENVIRONMENT=GUTCHECK=../../gutcheck/gutcheck ../harness/gc-test
    34.5 +TESTS=missing-space.tst spaced-punctuation.tst html-tag.tst html-symbol.tst \
    34.6 +	spaced-doublequote.tst mismatched-quotes.tst he-be.tst digits.tst \
    34.7 +	extra-period.tst ellipsis.tst short-line.tst abbreviation.tst \
    34.8 +	example.tst
    34.9 +
   34.10 +dist_pkgdata_DATA=$(TESTS)
    35.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.2 +++ b/test/compatibility/abbreviation.tst	Tue Jan 24 23:54:05 2012 +0000
    35.3 @@ -0,0 +1,9 @@
    35.4 +**************** INPUT ****************
    35.5 +This period is an error.But the periods in a.m. aren't.
    35.6 +**************** EXPECTED ****************
    35.7 +
    35.8 +This period is an error.But the periods in a.m. aren't.
    35.9 +    Line 1 column 45 - Query word m - not reporting duplicates
   35.10 +
   35.11 +This period is an error.But the periods in a.m. aren't.
   35.12 +    Line 1 column 24 - Missing space?
    36.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.2 +++ b/test/compatibility/digits.tst	Tue Jan 24 23:54:05 2012 +0000
    36.3 @@ -0,0 +1,12 @@
    36.4 +**************** INPUT ****************
    36.5 +0K--this'11 make you look close1y.
    36.6 +**************** EXPECTED ****************
    36.7 +
    36.8 +0K--this'11 make you look close1y.
    36.9 +    Line 1 column 1 - Query digit in 0K
   36.10 +
   36.11 +0K--this'11 make you look close1y.
   36.12 +    Line 1 column 3 - Query digit in this'11
   36.13 +
   36.14 +0K--this'11 make you look close1y.
   36.15 +    Line 1 column 26 - Query digit in close1y
    37.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.2 +++ b/test/compatibility/ellipsis.tst	Tue Jan 24 23:54:05 2012 +0000
    37.3 @@ -0,0 +1,7 @@
    37.4 +**************** INPUT ****************
    37.5 +There are some complications . The extra space left around that
    37.6 +period was an error . . . but that ellipsis wasn't.
    37.7 +**************** EXPECTED ****************
    37.8 +
    37.9 +There are some complications . The extra space left around that
   37.10 +    Line 1 column 30 - Spaced punctuation?
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/test/compatibility/example.tst	Tue Jan 24 23:54:05 2012 +0000
    38.3 @@ -0,0 +1,87 @@
    38.4 +**************** INPUT ****************
    38.5 +They saw him distinctly, as with the naked eye; a word, a turn of
    38.6 +the pen, or a word unsaid, offered the picture of him in America,
    38.7 +Japan, China, Australia , nay, the continent of Europe, holding an
    38.8 +English review of his Maker's grotesques. Vernon seemed a
    38.9 +sheepish fellow, without stature abroad, glad of a compliment
   38.10 +, grateful for a dinner, endeavouring sadly to digest all he saw
   38.11 +and heard. But one was a Patterne; tbe other a Whitford. One had
   38.12 +genius; the other pottered after him to he a student. One was the
   38.13 +English gent1eman wherever he went; the other was a new kind of
   38.14 +thing, nondescript, produced in England of late, and not likely
   38.15 +to come to much good himself, or do much good to the country.
   38.16 +
   38.17 +Vernon's dancing in America was capitally described by Willoughby.
   38.18 +"Adieu to our cousins!" the latter wrote on his voyage to Japan.
   38.19 +"I may possibly have had some vogue in their ball-rooms, and in
   38.20 +showing them an English seat on horseback: 1 must resign myself if
   38.21 +I have not been popular among them. I could not sing their
   38.22 +national song--if a congery of states be a nation-- and I must
   38.23 +confess I listened with frigid politeness to their singing of it.
   38.24 +A great people, no doubt. Adieu to them. I have had to tear old
   38.25 +Vernon away. He had serious thoughts of settling, means to
   38.26 +
   38.27 +correspond with some of them. On the whole, forgetting two or
   38.28 +more "traits of insolence~ on the part of his hosts, which he
   38.29 +cited, Willoughby escaped pretty comfortably. The President had
   38.30 +been, consciously or not,uncivil, but one knew his origin! Upon
   38.31 +these interjections, placable flicks of the lionly tail addressed
   38.32 +to Britannia the Ruler, who expected him in some mildish way to
   38.33 +lash terga cauda in retiring, Sir WilIoughby Patterne passed from
   38.34 +a land of alien manners,; and ever after he spoke of America
   38.35 +respectfully aud pensively, with a tail tucked in, as it were. His
   38.36 +travels were profitable to himself. The fact is, that tbere are
   38.37 +cousins who come to greatness and rnust be pacified, or they will
   38.38 +prove annoying. Heaven forefend a collision between cousins!
   38.39 +**************** EXPECTED ****************
   38.40 +
   38.41 +Japan, China, Australia , nay, the continent of Europe, holding an
   38.42 +    Line 3 column 25 - Spaced punctuation?
   38.43 +
   38.44 +, grateful for a dinner, endeavouring sadly to digest all he saw
   38.45 +    Line 6 column 1 - Begins with punctuation?
   38.46 +
   38.47 +and heard. But one was a Patterne; tbe other a Whitford. One had
   38.48 +    Line 7 column 34 - Query word tbe - not reporting duplicates
   38.49 +
   38.50 +genius; the other pottered after him to he a student. One was the
   38.51 +    Line 8 column 37 - Query he/be error?
   38.52 +
   38.53 +English gent1eman wherever he went; the other was a new kind of
   38.54 +    Line 9 column 8 - Query digit in gent1eman
   38.55 +
   38.56 +showing them an English seat on horseback: 1 must resign myself if
   38.57 +    Line 16 column 43 - Query standalone 1
   38.58 +
   38.59 +national song--if a congery of states be a nation-- and I must
   38.60 +    Line 18 column 50 - Spaced em-dash?
   38.61 +
   38.62 +Vernon away. He had serious thoughts of settling, means to
   38.63 +    Line 21 column 58 - No punctuation at para end?
   38.64 +
   38.65 +Vernon's dancing in America was capitally described by Willoughby.
   38.66 +    Line 22 - Mismatched quotes
   38.67 +
   38.68 +correspond with some of them. On the whole, forgetting two or
   38.69 +    Line 23 column 1 - Paragraph starts with lower-case
   38.70 +
   38.71 +more "traits of insolence~ on the part of his hosts, which he
   38.72 +    Line 24 column 26 - Tilde character?
   38.73 +
   38.74 +been, consciously or not,uncivil, but one knew his origin! Upon
   38.75 +    Line 26 column 25 - Missing space?
   38.76 +
   38.77 +lash terga cauda in retiring, Sir WilIoughby Patterne passed from
   38.78 +    Line 29 column 34 - Query word WilIoughby - not reporting duplicates
   38.79 +
   38.80 +a land of alien manners,; and ever after he spoke of America
   38.81 +    Line 30 column 24 - Double punctuation?
   38.82 +
   38.83 +respectfully aud pensively, with a tail tucked in, as it were. His
   38.84 +    Line 31 column 13 - Query word aud - not reporting duplicates
   38.85 +
   38.86 +travels were profitable to himself. The fact is, that tbere are
   38.87 +    Line 32 column 54 - Query word tbere - not reporting duplicates
   38.88 +
   38.89 +cousins who come to greatness and rnust be pacified, or they will
   38.90 +    Line 33 column 34 - Query word rnust - not reporting duplicates
    39.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.2 +++ b/test/compatibility/extra-period.tst	Tue Jan 24 23:54:05 2012 +0000
    39.3 @@ -0,0 +1,6 @@
    39.4 +**************** INPUT ****************
    39.5 +"If you do. you'll regret it!"
    39.6 +**************** EXPECTED ****************
    39.7 +
    39.8 +"If you do. you'll regret it!"
    39.9 +    Line 1 column 11 - Extra period?
    40.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.2 +++ b/test/compatibility/he-be.tst	Tue Jan 24 23:54:05 2012 +0000
    40.3 @@ -0,0 +1,6 @@
    40.4 +**************** INPUT ****************
    40.5 +The horse is said to he worth a lot.
    40.6 +**************** EXPECTED ****************
    40.7 +
    40.8 +The horse is said to he worth a lot.
    40.9 +    Line 1 column 18 - Query he/be error?
    41.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.2 +++ b/test/compatibility/html-symbol.tst	Tue Jan 24 23:54:05 2012 +0000
    41.3 @@ -0,0 +1,6 @@
    41.4 +**************** INPUT ****************
    41.5 +&So;
    41.6 +**************** EXPECTED ****************
    41.7 +
    41.8 +&So;
    41.9 +    Line 1 column 1 - HTML symbol? &So; 
    42.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    42.2 +++ b/test/compatibility/html-tag.tst	Tue Jan 24 23:54:05 2012 +0000
    42.3 @@ -0,0 +1,6 @@
    42.4 +**************** INPUT ****************
    42.5 +<This is a tag>
    42.6 +**************** EXPECTED ****************
    42.7 +
    42.8 +<This is a tag>
    42.9 +    Line 1 column 1 - HTML Tag? <This is a tag> 
    43.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    43.2 +++ b/test/compatibility/mismatched-quotes.tst	Tue Jan 24 23:54:05 2012 +0000
    43.3 @@ -0,0 +1,8 @@
    43.4 +**************** INPUT ****************
    43.5 +Margaret said: "Now you should start for school.
    43.6 +
    43.7 +New paragraph.
    43.8 +**************** EXPECTED ****************
    43.9 +
   43.10 +Margaret said: "Now you should start for school.
   43.11 +    Line 2 - Mismatched quotes
    44.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    44.2 +++ b/test/compatibility/missing-space.tst	Tue Jan 24 23:54:05 2012 +0000
    44.3 @@ -0,0 +1,6 @@
    44.4 +**************** INPUT ****************
    44.5 +"Look!John, over there!"
    44.6 +**************** EXPECTED ****************
    44.7 +
    44.8 +"Look!John, over there!"
    44.9 +    Line 1 column 6 - Missing space?
    45.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.2 +++ b/test/compatibility/short-line.tst	Tue Jan 24 23:54:05 2012 +0000
    45.3 @@ -0,0 +1,15 @@
    45.4 +**************** INPUT ****************
    45.5 +The second line of a paragraph isn't usually short at all
    45.6 +and
    45.7 +should be flagged as a warning by gutcheck as long as there
    45.8 +are sufficient numbers of lines in the file to stop it deciding
    45.9 +that there are too many short lines to bother reporting, which
   45.10 +means that I have to waffle on until we have at least 10 lines
   45.11 +of text.
   45.12 +
   45.13 +The last line of a paragraph
   45.14 +is usually short.
   45.15 +**************** EXPECTED ****************
   45.16 +
   45.17 +and
   45.18 +    Line 2 column 3 - Short line 3?
    46.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.2 +++ b/test/compatibility/spaced-doublequote.tst	Tue Jan 24 23:54:05 2012 +0000
    46.3 @@ -0,0 +1,9 @@
    46.4 +**************** INPUT ****************
    46.5 +Margaret said: " Now you should start for school."
    46.6 +**************** EXPECTED ****************
    46.7 +
    46.8 +Margaret said: " Now you should start for school."
    46.9 +    Line 1 column 16 - Wrongspaced quotes?
   46.10 +
   46.11 +Margaret said: " Now you should start for school."
   46.12 +    Line 1 column 15 - Spaced doublequote?
    47.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    47.2 +++ b/test/compatibility/spaced-punctuation.tst	Tue Jan 24 23:54:05 2012 +0000
    47.3 @@ -0,0 +1,6 @@
    47.4 +**************** INPUT ****************
    47.5 +"Look! John , over there!"
    47.6 +**************** EXPECTED ****************
    47.7 +
    47.8 +"Look! John , over there!"
    47.9 +    Line 1 column 13 - Spaced punctuation?
    48.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.2 +++ b/test/harness/Makefile.am	Tue Jan 24 23:54:05 2012 +0000
    48.3 @@ -0,0 +1,8 @@
    48.4 +INCLUDES=-I$(top_srcdir)
    48.5 +bin_PROGRAMS=gc-test
    48.6 +AM_CFLAGS=$(GLIB_CFLAGS)
    48.7 +LIBS=$(GLIB_LIBS)
    48.8 +
    48.9 +gc_test_SOURCES=gc-test.c testcase.c testcase.h testcaseio.c testcaseio.h \
   48.10 +	testcaseparser.c testcaseparser.h
   48.11 +gc_test_LDADD=../../gclib/libgc.la
    49.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.2 +++ b/test/harness/gc-test.c	Tue Jan 24 23:54:05 2012 +0000
    49.3 @@ -0,0 +1,31 @@
    49.4 +#include <stdlib.h>
    49.5 +#include <stdio.h>
    49.6 +#include <string.h>
    49.7 +#include <gclib/gclib.h>
    49.8 +#include "testcase.h"
    49.9 +#include "testcaseio.h"
   49.10 +
   49.11 +/*
   49.12 + * Returns FALSE if the test should be considered to have failed.
   49.13 + * (returns TRUE on pass or expected-fail).
   49.14 + */
   49.15 +boolean run_test(const char *filename)
   49.16 +{
   49.17 +    Testcase *testcase;
   49.18 +    boolean retval;
   49.19 +    testcase=testcase_parse_file(filename);
   49.20 +    if (!testcase)
   49.21 +	return FALSE;
   49.22 +    retval=testcase_run(testcase);
   49.23 +    testcase_free(testcase);
   49.24 +    return retval;
   49.25 +}
   49.26 +
   49.27 +int main(int argc,char **argv)
   49.28 +{
   49.29 +    int i;
   49.30 +    boolean pass=TRUE;
   49.31 +    for(i=1;i<argc;i++)
   49.32 +	pass&=run_test(argv[i]);
   49.33 +    return pass?0:1;
   49.34 +}
    50.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    50.2 +++ b/test/harness/testcase.c	Tue Jan 24 23:54:05 2012 +0000
    50.3 @@ -0,0 +1,203 @@
    50.4 +#include <stdlib.h>
    50.5 +#include <stdio.h>
    50.6 +#include <string.h>
    50.7 +#include <unistd.h>
    50.8 +#include <errno.h>
    50.9 +#ifdef WIN32
   50.10 +#include <io.h>
   50.11 +#include <fcntl.h>
   50.12 +#endif
   50.13 +#include <gclib/gclib.h>
   50.14 +#include "testcase.h"
   50.15 +
   50.16 +#if !HAVE_MKSTEMP
   50.17 +/*
   50.18 + * An insecure implementation of mkstemp(), for those platforms that
   50.19 + * don't support it.
   50.20 + */
   50.21 +int mkstemp(char *template)
   50.22 +{
   50.23 +    int fd;
   50.24 +    char *s;
   50.25 +    for(;;)
   50.26 +    {
   50.27 +	s=str_dup(template);
   50.28 +	mktemp(s);
   50.29 +	if (!*s)
   50.30 +	{
   50.31 +	    errno=EEXIST;
   50.32 +	    mem_free(s);
   50.33 +	    return -1;
   50.34 +	}
   50.35 +	fd=open(s,O_RDWR|O_CREAT|O_EXCL,0600);
   50.36 +	if (fd>0)
   50.37 +	{
   50.38 +	    strcpy(template,s);
   50.39 +	    mem_free(s);
   50.40 +	    return fd;
   50.41 +	}
   50.42 +	else
   50.43 +	    mem_free(s);
   50.44 +    }
   50.45 +}
   50.46 +#endif	/* !HAVE_MKSTEMP */
   50.47 +
   50.48 +/*
   50.49 + * As write(), but always convert NL to CR NL.
   50.50 + */
   50.51 +static size_t write_text(int fd,const char *buf,size_t count)
   50.52 +{
   50.53 +    size_t i;
   50.54 +    FILE *fp;
   50.55 +    fd=dup(fd);
   50.56 +    if (fd<0)
   50.57 +	return -1;
   50.58 +#ifdef WIN32
   50.59 +    if (_setmode(fd,_O_BINARY)<0)
   50.60 +    {
   50.61 +	close(fd);
   50.62 +	return -1;
   50.63 +    }
   50.64 +#endif
   50.65 +    fp=fdopen(fd,"wb");
   50.66 +    if (!fp)
   50.67 +    {
   50.68 +	close(fd);
   50.69 +	return -1;
   50.70 +    }
   50.71 +    for(i=0;i<count;i++)
   50.72 +    {
   50.73 +	if (buf[i]=='\n')
   50.74 +	    if (putc('\r',fp)==EOF)
   50.75 +	    {
   50.76 +		(void)fclose(fp);
   50.77 +		return -1;
   50.78 +	    }
   50.79 +	if (putc(buf[i],fp)==EOF)
   50.80 +	{
   50.81 +	    (void)fclose(fp);
   50.82 +	    return -1;
   50.83 +	}
   50.84 +    }
   50.85 +    if (fclose(fp))
   50.86 +	return -1;
   50.87 +    return count;
   50.88 +}
   50.89 +
   50.90 +/*
   50.91 + * Return the length (in bytes) or any common prefix between s1 and s2.
   50.92 + */
   50.93 +size_t common_prefix_length(const char *s1,const char *s2)
   50.94 +{
   50.95 +    size_t i;
   50.96 +    for(i=0;s1[i] && s2[i] && s1[i]==s2[i];i++)
   50.97 +	;
   50.98 +    return i;
   50.99 +}
  50.100 +
  50.101 +/*
  50.102 + * Run a testcase, returning FALSE on fail or error and
  50.103 + * TRUE on pass or expected-fail.
  50.104 + * Suitable message(s) will be printed in all cases.
  50.105 + */
  50.106 +boolean testcase_run(Testcase *testcase)
  50.107 +{
  50.108 +    boolean r;
  50.109 +    int fd,exit_status,col;
  50.110 +    size_t n,pos,offset,header_len;
  50.111 +    FILE *fp;
  50.112 +    char input[]="TEST-XXXXXX";
  50.113 +    char *endp,*bol;
  50.114 +    char *command[3];
  50.115 +    String *expected,*report;
  50.116 +    char *output;
  50.117 +    fd=mkstemp(input);
  50.118 +    if (testcase->input)
  50.119 +	n=strlen(testcase->input);
  50.120 +    else
  50.121 +	n=0;
  50.122 +    if (n && write_text(fd,testcase->input,n)!=n)
  50.123 +    {
  50.124 +	perror(input);
  50.125 +	close(fd);
  50.126 +	(void)remove(input);
  50.127 +	return FALSE;
  50.128 +    }
  50.129 +    close(fd);
  50.130 +    command[0]=getenv("GUTCHECK");
  50.131 +    if (!command[0])
  50.132 +	command[0]="." GC_DIR_SEPARATOR_S "gutcheck";
  50.133 +    command[1]=input;
  50.134 +    command[2]=NULL;
  50.135 +    if (testcase->expected)
  50.136 +	r=spawn_sync(command,&output,&exit_status);
  50.137 +    else
  50.138 +    {
  50.139 +	r=spawn_sync(command,NULL,&exit_status);
  50.140 +	output=NULL;
  50.141 +    }
  50.142 +    (void)remove(input);
  50.143 +    if (!r)
  50.144 +	return FALSE;
  50.145 +    if (testcase->expected)
  50.146 +    {
  50.147 +	expected=string_new("\n\nFile: ");
  50.148 +	string_append(expected,input);
  50.149 +	string_append(expected,"\n\n\n");
  50.150 +	header_len=expected->len;
  50.151 +	string_append(expected,testcase->expected);
  50.152 +    }
  50.153 +    else
  50.154 +    {
  50.155 +	expected=NULL;
  50.156 +	header_len=0;
  50.157 +    }
  50.158 +    if (expected && strcmp(output,expected->str))
  50.159 +    {
  50.160 +	fprintf(stderr,"%s: FAIL\n",testcase->basename);
  50.161 +	offset=common_prefix_length(output,expected->str);
  50.162 +	if (offset==header_len && !output[offset])
  50.163 +	    fprintf(stderr,"Unexpected zero warnings from gutcheck.\n");
  50.164 +	else
  50.165 +	{
  50.166 +	    endp=strchr(output+offset,'\n');
  50.167 +	    if (!endp)
  50.168 +		endp=output+strlen(output);
  50.169 +	    report=string_new(NULL);
  50.170 +	    string_append_len(report,output,endp-output);
  50.171 +	    bol=strrchr(report->str,'\n');
  50.172 +	    if (bol)
  50.173 +		bol++;
  50.174 +	    else
  50.175 +		bol=report->str;
  50.176 +	    col=offset-(bol-report->str);
  50.177 +	    fprintf(stderr,"Unexpected output from gutcheck:\n");
  50.178 +	    if (report->len>=header_len)
  50.179 +		fprintf(stderr,"%s\n%*s^\n",report->str+header_len,col,"");
  50.180 +	    else
  50.181 +		fprintf(stderr,"%s\n%*s^\n",report->str,col,"");
  50.182 +	    string_free(report,TRUE);
  50.183 +	}
  50.184 +	string_free(expected,TRUE);
  50.185 +	mem_free(output);
  50.186 +	return FALSE;
  50.187 +    }
  50.188 +    string_free(expected,TRUE);
  50.189 +    mem_free(output);
  50.190 +    if (exit_status)
  50.191 +	fprintf(stderr,"gutcheck exited with code %d\n",r);
  50.192 +    if (!exit_status)
  50.193 +	fprintf(stderr,"%s: PASS\n",testcase->basename);
  50.194 +    return !exit_status;
  50.195 +}
  50.196 +
  50.197 +/*
  50.198 + * Free a testcase.
  50.199 + */
  50.200 +void testcase_free(Testcase *testcase)
  50.201 +{
  50.202 +    mem_free(testcase->basename);
  50.203 +    mem_free(testcase->input);
  50.204 +    mem_free(testcase->expected);
  50.205 +    mem_free(testcase);
  50.206 +}
    51.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    51.2 +++ b/test/harness/testcase.h	Tue Jan 24 23:54:05 2012 +0000
    51.3 @@ -0,0 +1,16 @@
    51.4 +#ifndef TESTCASE_H
    51.5 +#define TESTCASE_H
    51.6 +
    51.7 +typedef struct {
    51.8 +    char *basename;
    51.9 +    char *input;
   51.10 +    char *expected;
   51.11 +    enum {
   51.12 +	TESTCASE_XFAIL=1<<0,
   51.13 +    } flags;
   51.14 +} Testcase;
   51.15 +
   51.16 +boolean testcase_run(Testcase *testcase);
   51.17 +void testcase_free(Testcase *testcase);
   51.18 +
   51.19 +#endif	/* TESTCASE_H */
    52.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    52.2 +++ b/test/harness/testcaseio.c	Tue Jan 24 23:54:05 2012 +0000
    52.3 @@ -0,0 +1,63 @@
    52.4 +#include <stdlib.h>
    52.5 +#include <stdio.h>
    52.6 +#include <string.h>
    52.7 +#include <gclib/gclib.h>
    52.8 +#include "testcaseparser.h"
    52.9 +#include "testcaseio.h"
   52.10 +
   52.11 +/*
   52.12 + * Read a testcase in from a file.
   52.13 + * On error, print a suitable message on stderr and return NULL.
   52.14 + * The returned testcase should be freed with testcase_free().
   52.15 + */
   52.16 +Testcase *testcase_parse_file(const char *filename)
   52.17 +{
   52.18 +    Testcase *testcase;
   52.19 +    TestcaseParser *parser;
   52.20 +    char *s;
   52.21 +    const char *tag,*text;
   52.22 +    boolean found_tag=FALSE;
   52.23 +    parser=testcase_parser_new_from_file(filename);
   52.24 +    if (!parser)
   52.25 +	return NULL;
   52.26 +    if (!*testcase_parser_get_flag(parser))
   52.27 +    {
   52.28 +	fprintf(stderr,"%s: Not a valid testcase (flag)\n",filename);
   52.29 +	testcase_parser_free(parser);
   52.30 +	return NULL;
   52.31 +    }
   52.32 +    testcase=mem_new0(Testcase,1);
   52.33 +    testcase->basename=path_get_basename(filename);
   52.34 +    s=strrchr(testcase->basename,'.');
   52.35 +    if (s)
   52.36 +	*s='\0';
   52.37 +    while(testcase_parser_get_next_tag(parser,&tag,&text))
   52.38 +    {
   52.39 +	if (!testcase->input && !strcmp(tag,"INPUT"))
   52.40 +	    testcase->input=str_dup(text);
   52.41 +	else if (!testcase->expected && !strcmp(tag,"EXPECTED"))
   52.42 +	    testcase->expected=str_dup(text);
   52.43 +	else
   52.44 +	{
   52.45 +	    fprintf(stderr,"%s: Not a valid testcase (%s)\n",filename,tag);
   52.46 +	    testcase_free(testcase);
   52.47 +	    testcase_parser_free(parser);
   52.48 +	    return NULL;
   52.49 +	}
   52.50 +	found_tag=TRUE;
   52.51 +    }
   52.52 +    if (!testcase_parser_at_eof(parser))
   52.53 +    {
   52.54 +	if (found_tag)
   52.55 +	    fprintf(stderr,"%s: Not a valid testcase (garbage at end)\n",
   52.56 +	      filename);
   52.57 +	else
   52.58 +	    fprintf(stderr,"%s: Not a valid testcase (no valid tags)\n",
   52.59 +	      filename);
   52.60 +	testcase_free(testcase);
   52.61 +	testcase_parser_free(parser);
   52.62 +	return NULL;
   52.63 +    }
   52.64 +    testcase_parser_free(parser);
   52.65 +    return testcase;
   52.66 +}
    53.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    53.2 +++ b/test/harness/testcaseio.h	Tue Jan 24 23:54:05 2012 +0000
    53.3 @@ -0,0 +1,8 @@
    53.4 +#ifndef TESTCASE_IO_H
    53.5 +#define TESTCASE_IO_H
    53.6 +
    53.7 +#include "testcase.h"
    53.8 +
    53.9 +Testcase *testcase_parse_file(const char *filename);
   53.10 +
   53.11 +#endif	/* TESTCASE_IO_H */
    54.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    54.2 +++ b/test/harness/testcaseparser.c	Tue Jan 24 23:54:05 2012 +0000
    54.3 @@ -0,0 +1,115 @@
    54.4 +#include <stdlib.h>
    54.5 +#include <stdio.h>
    54.6 +#include <string.h>
    54.7 +#include <ctype.h>
    54.8 +#include <gclib/gclib.h>
    54.9 +#include "testcaseparser.h"
   54.10 +
   54.11 +/*
   54.12 + * Get the flag (the string of characters which bracket tags in test cases).
   54.13 + */
   54.14 +const char *testcase_parser_get_flag(TestcaseParser *parser)
   54.15 +{
   54.16 +    char *s=parser->contents;
   54.17 +    if (!parser->flag)
   54.18 +    {
   54.19 +	parser->flag=string_new(NULL);
   54.20 +	while(*s>' ' && *s<='~')
   54.21 +	    string_append_c(parser->flag,*s++);
   54.22 +    }
   54.23 +    return parser->flag->str;
   54.24 +}
   54.25 +
   54.26 +/*
   54.27 + * Test if the parser has reached the end of the input file
   54.28 + */
   54.29 +boolean testcase_parser_at_eof(TestcaseParser *parser)
   54.30 +{
   54.31 +    return !parser->contents[parser->pos];
   54.32 +}
   54.33 +
   54.34 +/*
   54.35 + * Get the next tag (and its associated text, if any) from a test case.
   54.36 + * Returns: TRUE if successful and FALSE if no more valid tags are present.
   54.37 + * Callers can call testcase_parser_at_eof() when testcase_parser_get_next_tag()
   54.38 + * to distinguish EOF and text which isn't a valid tag.
   54.39 + */
   54.40 +boolean testcase_parser_get_next_tag(TestcaseParser *parser,const char **tag,
   54.41 +  const char **text)
   54.42 +{
   54.43 +    size_t n;
   54.44 +    char *eol,*endp;
   54.45 +    String *string;
   54.46 +    mem_free(parser->tag);
   54.47 +    parser->tag=NULL;
   54.48 +    mem_free(parser->tag_text);
   54.49 +    parser->tag_text=NULL;
   54.50 +    (void)testcase_parser_get_flag(parser);
   54.51 +    if (strncmp(parser->contents+parser->pos,parser->flag->str,
   54.52 +      parser->flag->len))
   54.53 +	return FALSE;
   54.54 +    eol=strchr(parser->contents+parser->pos,'\n');
   54.55 +    if (!eol)
   54.56 +	return FALSE;
   54.57 +    endp=eol-parser->flag->len;
   54.58 +    if (strncmp(endp,parser->flag->str,parser->flag->len))
   54.59 +	return FALSE;
   54.60 +    while(endp>parser->contents && isspace(endp[-1]))
   54.61 +	endp--;
   54.62 +    parser->pos+=parser->flag->len;
   54.63 +    while(isspace(parser->contents[parser->pos]))
   54.64 +	parser->pos++;
   54.65 +    parser->tag=str_ndup(parser->contents+parser->pos,
   54.66 +      endp-(parser->contents+parser->pos));
   54.67 +    parser->pos=eol-parser->contents+1;
   54.68 +    string=string_new(NULL);
   54.69 +    while (!testcase_parser_at_eof(parser) &&
   54.70 +      strncmp(parser->contents+parser->pos,parser->flag->str,parser->flag->len))
   54.71 +    {
   54.72 +	eol=strchr(parser->contents+parser->pos,'\n');
   54.73 +	if (eol)
   54.74 +	    n=eol-(parser->contents+parser->pos)+1;
   54.75 +	else
   54.76 +	    n=strlen(parser->contents+parser->pos);
   54.77 +	string_append_len(string,parser->contents+parser->pos,n);
   54.78 +	parser->pos+=n;
   54.79 +    }
   54.80 +    parser->tag_text=string_free(string,FALSE);
   54.81 +    if (!parser->tag_text)
   54.82 +	parser->tag_text=str_dup("");
   54.83 +    if (tag)
   54.84 +	*tag=parser->tag;
   54.85 +    if (text)
   54.86 +	*text=parser->tag_text;
   54.87 +    return TRUE;
   54.88 +}
   54.89 +
   54.90 +/*
   54.91 + * Create a testcase parser to read a regular file.
   54.92 + */
   54.93 +TestcaseParser *testcase_parser_new_from_file(const char *filename)
   54.94 +{
   54.95 +    TestcaseParser *parser;
   54.96 +    parser=mem_new0(TestcaseParser,1);
   54.97 +    if (!file_get_contents_text(filename,&parser->contents,NULL))
   54.98 +    {
   54.99 +	mem_free(parser);
  54.100 +	return NULL;
  54.101 +    }
  54.102 +    parser->filename=str_dup(filename);
  54.103 +    return parser;
  54.104 +}
  54.105 +
  54.106 +/*
  54.107 + * Free a testcase parser.
  54.108 + */
  54.109 +void testcase_parser_free(TestcaseParser *parser)
  54.110 +{
  54.111 +    mem_free(parser->filename);
  54.112 +    mem_free(parser->contents);
  54.113 +    if (parser->flag)
  54.114 +	string_free(parser->flag,TRUE);
  54.115 +    mem_free(parser->tag);
  54.116 +    mem_free(parser->tag_text);
  54.117 +    mem_free(parser);
  54.118 +}
    55.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    55.2 +++ b/test/harness/testcaseparser.h	Tue Jan 24 23:54:05 2012 +0000
    55.3 @@ -0,0 +1,22 @@
    55.4 +#ifndef TESTCASE_PARSER_H
    55.5 +#define TESTCASE_PARSER_H
    55.6 +
    55.7 +#include <gclib/gclib.h>
    55.8 +
    55.9 +typedef struct {
   55.10 +    char *filename;
   55.11 +    char *contents;
   55.12 +    String *flag;
   55.13 +    size_t pos;
   55.14 +    char *tag;
   55.15 +    char *tag_text;
   55.16 +} TestcaseParser;
   55.17 +
   55.18 +const char *testcase_parser_get_flag(TestcaseParser *parser);
   55.19 +boolean testcase_parser_get_next_tag(TestcaseParser *parser,const char **tag,
   55.20 +  const char **text);
   55.21 +boolean testcase_parser_at_eof(TestcaseParser *parser);
   55.22 +TestcaseParser *testcase_parser_new_from_file(const char *filename);
   55.23 +void testcase_parser_free(TestcaseParser *parser);
   55.24 +
   55.25 +#endif	/* TESTCASE_PARSER_H */