bookloupe/bookloupe.c
author ali <ali@juiblex.co.uk>
Sat May 25 09:40:56 2013 +0100 (2013-05-25)
changeset 42 20d51419e077
parent 41 68b1403e2971
child 43 e4042a067753
permissions -rw-r--r--
Break report_first_pass() out
     1 /*************************************************************************/
     2 /* bookloupe--check for assorted weirdnesses in a PG candidate text file */
     3 /*                                                                       */
     4 /* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>                  */
     5 /* Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>                     */
     6 /*                                                                       */
     7 /* This program is free software; you can redistribute it and/or modify  */
     8 /* it under the terms of the GNU General Public License as published by  */
     9 /* the Free Software Foundation; either version 2 of the License, or     */
    10 /* (at your option) any later version.                                   */
    11 /*                                                                       */
    12 /* This program is distributed in the hope that it will be useful,       */
    13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of        */
    14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the          */
    15 /* GNU General Public License for more details.                          */
    16 /*                                                                       */
    17 /* You should have received a copy of the GNU General Public License     */
    18 /* along with this program. If not, see <http://www.gnu.org/licenses/>.  */
    19 /*************************************************************************/
    20 
    21 #include <stdio.h>
    22 #include <stdlib.h>
    23 #include <string.h>
    24 #include <ctype.h>
    25 
    26 #define MAXWORDLEN    80    /* max length of one word             */
    27 #define LINEBUFSIZE 2048    /* buffer size for an input line      */
    28 
    29 #define MAX_USER_TYPOS 1000
    30 #define USERTYPO_FILE "gutcheck.typ"
    31 
    32 #ifndef MAX_PATH
    33 #define MAX_PATH 16384
    34 #endif
    35 
    36 char aline[LINEBUFSIZE];
    37 char prevline[LINEBUFSIZE];
    38 
    39 /* Common typos. */
    40 char *typo[] = {
    41     "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane",
    42     "nad", "te", "ig", "acn",  "ahve", "alot", "anbd", "andt", "awya", "aywa",
    43     "bakc", "om", "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt",
    44     "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr", "hmi", "hse",
    45     "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd",
    46     "gerat", "goign", "gruop", "haev", "hda", "hearign", "seeign", "sayign",
    47     "herat", "hge", "hsa", "hsi", "hte", "htere", "htese", "htey", "htis",
    48     "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut",
    49     "loev", "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter",
    50     "omre", "onyl", "otehr", "otu", "owrk", "owuld", "peice", "peices",
    51     "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",
    52     "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe",
    53     "sohw", "stnad", "stopry", "stoyr", "stpo", "tahn", "taht", "tath",
    54     "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge", "thier",
    55     "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne",
    56     "tirne", "tkae", "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey",
    57     "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih", "whihc", "whta",
    58     "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",
    59     "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking",
    60     "wtih", "wuould", "wya", "yera", "yeras", "yersa", "yoiu", "youve",
    61     "ytou", "yuor", "abead", "ahle", "ahout", "ahove", "altbough", "balf",
    62     "bardly", "bas", "bave", "baving", "bebind", "beld", "belp", "belped",
    63     "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge",
    64     "dehates", "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan",
    65     "hegin", "heing", "helieve", "henefit", "hetter", "hetween", "heyond",
    66     "hig", "higber", "huild", "huy", "hy", "jobn", "joh", "meanwbile",
    67     "memher", "memhers", "numher", "numhers", "perbaps", "prohlem", "puhlic",
    68     "witbout", "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud",
    69     "prornise", "prornised", "modem", "bo", "heside", "chapteb", "chaptee",
    70     "se", ""
    71 };
    72 
    73 char *usertypo[MAX_USER_TYPOS];
    74 
    75 /* Common abbreviations and other OK words not to query as typos. */
    76 char *okword[] = {
    77     "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm",
    78     "rd", "sh", "br", "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd",
    79     "pompeii","hawaii","hawaiian", "hotbed", "heartbeat", "heartbeats",
    80     "outbid", "outbids", "frostbite", "frostbitten", ""
    81 };
    82 
    83 /* Common abbreviations that cause otherwise unexplained periods. */
    84 char *abbrev[] = {
    85     "cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op",
    86     "cit", "deg", "min", "chap", "oz", "mme", "mlle", "mssrs", ""
    87 };
    88 
    89 /*
    90  * Two-Letter combinations that rarely if ever start words,
    91  * but are common scannos or otherwise common letter combinations.
    92  */
    93 char *nostart[] = {
    94     "hr", "hl", "cb", "sb", "tb", "wb", "tl", "tn", "rn", "lt", "tj", ""
    95 };
    96 
    97 /*
    98  * Two-Letter combinations that rarely if ever end words,
    99  * but are common scannos or otherwise common letter combinations.
   100  */
   101 char *noend[] = {
   102     "cb", "gb", "pb", "sb", "tb", "wh", "fr", "br", "qu", "tw", "gl", "fl",
   103     "sw", "gr", "sl", "cl", "iy", ""
   104 };
   105 
   106 char *markup[] = {
   107     "a", "b", "big", "blockquote", "body", "br", "center", "col", "div", "em",
   108     "font", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i",
   109     "img", "li", "meta", "ol", "p", "pre", "small", "span", "strong", "sub",
   110     "sup", "table", "td", "tfoot", "thead", "title", "tr", "tt", "u", "ul", ""
   111 };
   112 
   113 char *DPmarkup[] = {
   114     "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>", ""
   115 };
   116 
   117 char *nocomma[] = {
   118     "the", "it's", "their", "an", "mrs", "a", "our", "that's", "its", "whose",
   119     "every", "i'll", "your", "my", "mr", "mrs", "mss", "mssrs", "ft", "pm",
   120     "st", "dr", "rd", "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "i'm",
   121     "during", "let", "toward", "among", ""
   122 };
   123 
   124 char *noperiod[] = {
   125     "every", "i'm", "during", "that's", "their", "your", "our", "my", "or",
   126     "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether",
   127     "i'll", "whose", "who", "because", "when", "let", "till", "very", "an",
   128     "among", "those", "into", "whom", "having", "thence", ""
   129 }; 
   130 
   131 char vowels[] = "aeiouàáâãäæèéêëìíîïòóôõöùúûü";
   132 
   133 struct {
   134     char *htmlent;
   135     char *htmlnum;
   136     char *textent;
   137 } entities[] = {
   138     "&amp;",	"&#38;",     "&", 
   139     "&lt;",	"&#60;",     "<",
   140     "&gt;",	"&#62;",     ">",
   141     "&deg;",	"&#176;",    " degrees",
   142     "&pound;",	"&#163;",    "L",
   143     "&quot;",	"&#34;",     "\"", /* quotation mark = APL quote */
   144     "&OElig;",	"&#338;",    "OE", /* latin capital ligature OE */
   145     "&oelig;",	"&#339;",    "oe", /* latin small ligature oe */
   146     "&Scaron;",	"&#352;",    "S", /* latin capital letter S with caron */
   147     "&scaron;",	"&#353;",    "s", /* latin small letter s with caron */
   148     "&Yuml;",	"&#376;",    "Y", /* latin capital letter Y with diaeresis */
   149     "&circ;",	"&#710;",    "",  /* modifier letter circumflex accent */
   150     "&tilde;",	"&#732;",    "~", /* small tilde, U+02DC ISOdia */
   151     "&ensp;",	"&#8194;",   " ", /* en space, U+2002 ISOpub */
   152     "&emsp;",	"&#8195;",   " ", /* em space, U+2003 ISOpub */
   153     "&thinsp;",	"&#8201;",   " ", /* thin space, U+2009 ISOpub */
   154     "&ndash;",	"&#8211;",   "-", /* en dash, U+2013 ISOpub */
   155     "&mdash;",	"&#8212;",   "--", /* em dash, U+2014 ISOpub */
   156     "&rsquo;",	"&#8217;",   "'", /* right single quotation mark */
   157     "&sbquo;",	"&#8218;",   "'", /* single low-9 quotation mark */
   158     "&ldquo;",	"&#8220;",   "\"", /* left double quotation mark */
   159     "&rdquo;",	"&#8221;",   "\"", /* right double quotation mark */
   160     "&bdquo;",	"&#8222;",   "\"", /* double low-9 quotation mark */
   161     "&lsaquo;",	"&#8249;",   "\"", /* single left-pointing angle quotation mark */
   162     "&rsaquo;",	"&#8250;",   "\"", /* single right-pointing angle quotation mark */
   163     "&nbsp;",	"&#160;",    " ", /* no-break space = non-breaking space, */
   164     "&iexcl;",	"&#161;",    "!", /* inverted exclamation mark */
   165     "&cent;",	"&#162;",    "c", /* cent sign */
   166     "&pound;",	"&#163;",    "L", /* pound sign */
   167     "&curren;",	"&#164;",    "$", /* currency sign */
   168     "&yen;",	"&#165;",    "Y", /* yen sign = yuan sign */
   169     "&sect;",	"&#167;",    "--", /* section sign */
   170     "&uml;",	"&#168;",    " ", /* diaeresis = spacing diaeresis */
   171     "&copy;",	"&#169;",    "(C) ", /* copyright sign */
   172     "&ordf;",	"&#170;",    " ", /* feminine ordinal indicator */
   173     "&laquo;",	"&#171;",    "\"", /* left-pointing double angle quotation mark */
   174     "&shy;",	"&#173;",    "-", /* soft hyphen = discretionary hyphen */
   175     "&reg;",	"&#174;",    "(R) ", /* registered sign = registered trade mark sign */
   176     "&macr;",	"&#175;",    " ", /* macron = spacing macron = overline */
   177     "&deg;",	"&#176;",    " degrees", /* degree sign */
   178     "&plusmn;",	"&#177;",    "+-", /* plus-minus sign = plus-or-minus sign */
   179     "&sup2;",	"&#178;",    "2", /* superscript two = superscript digit two */
   180     "&sup3;",	"&#179;",    "3", /* superscript three = superscript digit three */
   181     "&acute;",	"&#180;",    " ", /* acute accent = spacing acute */
   182     "&micro;",	"&#181;",    "m", /* micro sign */
   183     "&para;",	"&#182;",    "--", /* pilcrow sign = paragraph sign */
   184     "&cedil;",	"&#184;",    " ", /* cedilla = spacing cedilla */
   185     "&sup1;",	"&#185;",    "1", /* superscript one = superscript digit one */
   186     "&ordm;",	"&#186;",    " ", /* masculine ordinal indicator */
   187     "&raquo;",	"&#187;",    "\"", /* right-pointing double angle quotation mark */
   188     "&frac14;",	"&#188;",    "1/4", /* vulgar fraction one quarter */
   189     "&frac12;",	"&#189;",    "1/2", /* vulgar fraction one half */
   190     "&frac34;",	"&#190;",    "3/4", /* vulgar fraction three quarters */
   191     "&iquest;",	"&#191;",    "?", /* inverted question mark */
   192     "&Agrave;",	"&#192;",    "A", /* latin capital letter A with grave */
   193     "&Aacute;",	"&#193;",    "A", /* latin capital letter A with acute */
   194     "&Acirc;",	"&#194;",    "A", /* latin capital letter A with circumflex */
   195     "&Atilde;",	"&#195;",    "A", /* latin capital letter A with tilde */
   196     "&Auml;",	"&#196;",    "A", /* latin capital letter A with diaeresis */
   197     "&Aring;",	"&#197;",    "A", /* latin capital letter A with ring above */
   198     "&AElig;",	"&#198;",    "AE", /* latin capital letter AE */
   199     "&Ccedil;",	"&#199;",    "C", /* latin capital letter C with cedilla */
   200     "&Egrave;",	"&#200;",    "E", /* latin capital letter E with grave */
   201     "&Eacute;",	"&#201;",    "E", /* latin capital letter E with acute */
   202     "&Ecirc;",	"&#202;",    "E", /* latin capital letter E with circumflex */
   203     "&Euml;",	"&#203;",    "E", /* latin capital letter E with diaeresis */
   204     "&Igrave;",	"&#204;",    "I", /* latin capital letter I with grave */
   205     "&Iacute;",	"&#205;",    "I", /* latin capital letter I with acute */
   206     "&Icirc;",	"&#206;",    "I", /* latin capital letter I with circumflex */
   207     "&Iuml;",	"&#207;",    "I", /* latin capital letter I with diaeresis */
   208     "&ETH;",	"&#208;",    "E", /* latin capital letter ETH */
   209     "&Ntilde;",	"&#209;",    "N", /* latin capital letter N with tilde */
   210     "&Ograve;",	"&#210;",    "O", /* latin capital letter O with grave */
   211     "&Oacute;",	"&#211;",    "O", /* latin capital letter O with acute */
   212     "&Ocirc;",	"&#212;",    "O", /* latin capital letter O with circumflex */
   213     "&Otilde;",	"&#213;",    "O", /* latin capital letter O with tilde */
   214     "&Ouml;",	"&#214;",    "O", /* latin capital letter O with diaeresis */
   215     "&times;",	"&#215;",    "*", /* multiplication sign */
   216     "&Oslash;",	"&#216;",    "O", /* latin capital letter O with stroke */
   217     "&Ugrave;",	"&#217;",    "U", /* latin capital letter U with grave */
   218     "&Uacute;",	"&#218;",    "U", /* latin capital letter U with acute */
   219     "&Ucirc;",	"&#219;",    "U", /* latin capital letter U with circumflex */
   220     "&Uuml;",	"&#220;",    "U", /* latin capital letter U with diaeresis */
   221     "&Yacute;",	"&#221;",    "Y", /* latin capital letter Y with acute */
   222     "&THORN;",	"&#222;",    "TH", /* latin capital letter THORN */
   223     "&szlig;",	"&#223;",    "sz", /* latin small letter sharp s = ess-zed */
   224     "&agrave;",	"&#224;",    "a", /* latin small letter a with grave */
   225     "&aacute;",	"&#225;",    "a", /* latin small letter a with acute */
   226     "&acirc;",	"&#226;",    "a", /* latin small letter a with circumflex */
   227     "&atilde;",	"&#227;",    "a", /* latin small letter a with tilde */
   228     "&auml;",	"&#228;",    "a", /* latin small letter a with diaeresis */
   229     "&aring;",	"&#229;",    "a", /* latin small letter a with ring above */
   230     "&aelig;",	"&#230;",    "ae", /* latin small letter ae */
   231     "&ccedil;",	"&#231;",    "c", /* latin small letter c with cedilla */
   232     "&egrave;",	"&#232;",    "e", /* latin small letter e with grave */
   233     "&eacute;",	"&#233;",    "e", /* latin small letter e with acute */
   234     "&ecirc;",	"&#234;",    "e", /* latin small letter e with circumflex */
   235     "&euml;",	"&#235;",    "e", /* latin small letter e with diaeresis */
   236     "&igrave;",	"&#236;",    "i", /* latin small letter i with grave */
   237     "&iacute;",	"&#237;",    "i", /* latin small letter i with acute */
   238     "&icirc;",	"&#238;",    "i", /* latin small letter i with circumflex */
   239     "&iuml;",	"&#239;",    "i", /* latin small letter i with diaeresis */
   240     "&eth;",	"&#240;",    "eth", /* latin small letter eth */
   241     "&ntilde;",	"&#241;",    "n", /* latin small letter n with tilde */
   242     "&ograve;",	"&#242;",    "o", /* latin small letter o with grave */
   243     "&oacute;",	"&#243;",    "o", /* latin small letter o with acute */
   244     "&ocirc;",	"&#244;",    "o", /* latin small letter o with circumflex */
   245     "&otilde;",	"&#245;",    "o", /* latin small letter o with tilde */
   246     "&ouml;",	"&#246;",    "o", /* latin small letter o with diaeresis */
   247     "&divide;",	"&#247;",    "/", /* division sign */
   248     "&oslash;",	"&#248;",    "o", /* latin small letter o with stroke */
   249     "&ugrave;",	"&#249;",    "u", /* latin small letter u with grave */
   250     "&uacute;",	"&#250;",    "u", /* latin small letter u with acute */
   251     "&ucirc;",	"&#251;",    "u", /* latin small letter u with circumflex */
   252     "&uuml;",	"&#252;",    "u", /* latin small letter u with diaeresis */
   253     "&yacute;",	"&#253;",    "y", /* latin small letter y with acute */
   254     "&thorn;",	"&#254;",    "th", /* latin small letter thorn */
   255     "&yuml;",	"&#255;",    "y", /* latin small letter y with diaeresis */
   256     "", ""
   257 };
   258 
   259 /* special characters */
   260 #define CHAR_SPACE        32
   261 #define CHAR_TAB           9
   262 #define CHAR_LF           10
   263 #define CHAR_CR           13
   264 #define CHAR_DQUOTE       34
   265 #define CHAR_SQUOTE       39
   266 #define CHAR_OPEN_SQUOTE  96
   267 #define CHAR_TILDE       126
   268 #define CHAR_ASTERISK     42
   269 #define CHAR_FORESLASH    47
   270 #define CHAR_CARAT        94
   271 
   272 #define CHAR_UNDERSCORE    '_'
   273 #define CHAR_OPEN_CBRACK   '{'
   274 #define CHAR_CLOSE_CBRACK  '}'
   275 #define CHAR_OPEN_RBRACK   '('
   276 #define CHAR_CLOSE_RBRACK  ')'
   277 #define CHAR_OPEN_SBRACK   '['
   278 #define CHAR_CLOSE_SBRACK  ']'
   279 
   280 /* longest and shortest normal PG line lengths */
   281 #define LONGEST_PG_LINE   75
   282 #define WAY_TOO_LONG      80
   283 #define SHORTEST_PG_LINE  55
   284 
   285 #define SWITCHES "ESTPXLOYHWVMUD" /* switches:-                            */
   286                                   /*     D - ignore DP-specific markup     */
   287                                   /*     E - echo queried line             */
   288                                   /*     S - check single quotes           */
   289                                   /*     T - check common typos            */
   290                                   /*     P - require closure of quotes on  */
   291                                   /*         every paragraph               */
   292                                   /*     X - "Trust no one" :-) Paranoid!  */
   293                                   /*         Queries everything            */
   294                                   /*     L - line end checking defaults on */
   295                                   /*         -L turns it off               */
   296                                   /*     O - overview. Just shows counts.  */
   297                                   /*     Y - puts errors to stdout         */
   298                                   /*         instead of stderr             */
   299                                   /*     H - Echoes header fields          */
   300                                   /*     M - Ignore markup in < >          */
   301                                   /*     U - Use file of User-defined Typos*/
   302                                   /*     W - Defaults for use on Web upload*/
   303                                   /*     V - Verbose - list EVERYTHING!    */
   304 #define SWITNO 14                 /* max number of switch parms            */
   305                                   /*        - used for defining array-size */
   306 #define MINARGS   1               /* minimum no of args excl switches      */
   307 #define MAXARGS   1               /* maximum no of args excl switches      */
   308 
   309 int pswit[SWITNO];                /* program switches set by SWITCHES      */
   310 
   311 #define ECHO_SWITCH      0
   312 #define SQUOTE_SWITCH    1
   313 #define TYPO_SWITCH      2
   314 #define QPARA_SWITCH     3
   315 #define PARANOID_SWITCH  4
   316 #define LINE_END_SWITCH  5
   317 #define OVERVIEW_SWITCH  6
   318 #define STDOUT_SWITCH    7
   319 #define HEADER_SWITCH    8
   320 #define WEB_SWITCH       9
   321 #define VERBOSE_SWITCH   10
   322 #define MARKUP_SWITCH    11
   323 #define USERTYPO_SWITCH  12
   324 #define DP_SWITCH        13
   325 
   326 long cnt_dquot;       /* for overview mode, count of doublequote queries */
   327 long cnt_squot;       /* for overview mode, count of singlequote queries */
   328 long cnt_brack;       /* for overview mode, count of brackets queries */
   329 long cnt_bin;         /* for overview mode, count of non-ASCII queries */
   330 long cnt_odd;         /* for overview mode, count of odd character queries */
   331 long cnt_long;        /* for overview mode, count of long line errors */
   332 long cnt_short;       /* for overview mode, count of short line queries */
   333 long cnt_punct;       /* for overview mode, count of punctuation and spacing queries */
   334 long cnt_dash;        /* for overview mode, count of dash-related queries */
   335 long cnt_word;        /* for overview mode, count of word queries */
   336 long cnt_html;        /* for overview mode, count of html queries */
   337 long cnt_lineend;     /* for overview mode, count of line-end queries */
   338 long cnt_spacend;     /* count of lines with space at end */
   339 long linecnt;         /* count of total lines in the file */
   340 long checked_linecnt; /* count of lines actually checked */
   341 
   342 void proghelp(void);
   343 void procfile(char *);
   344 
   345 #define LOW_THRESHOLD    0
   346 #define HIGH_THRESHOLD   1
   347 
   348 #define START 0
   349 #define END 1
   350 #define PREV 0
   351 #define NEXT 1
   352 #define FIRST_OF_PAIR 0
   353 #define SECOND_OF_PAIR 1
   354 
   355 #define MAX_WORDPAIR 1000
   356 
   357 char running_from[MAX_PATH];
   358 
   359 int mixdigit(char *);
   360 char *getaword(char *,char *);
   361 int matchword(char *,char *);
   362 char *flgets(char *,int,FILE *,long);
   363 void lowerit(char *);
   364 int gcisalpha(unsigned char);
   365 int gcisdigit(unsigned char);
   366 int gcisletter(unsigned char);
   367 char *gcstrchr(char *s,char c);
   368 void postprocess_for_HTML(char *);
   369 char *linehasmarkup(char *);
   370 char *losemarkup(char *);
   371 int tagcomp(char *,char *);
   372 char *loseentities(char *);
   373 int isroman(char *);
   374 int usertypo_count;
   375 void postprocess_for_DP(char *);
   376 
   377 char wrk[LINEBUFSIZE];
   378 
   379 #define MAX_QWORD 50
   380 #define MAX_QWORD_LENGTH 40
   381 char qword[MAX_QWORD][MAX_QWORD_LENGTH];
   382 char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
   383 signed int dupcnt[MAX_QWORD];
   384 
   385 int main(int argc,char **argv)
   386 {
   387     char *argsw,*s;
   388     int i,switno,invarg;
   389     char usertypo_file[MAX_PATH];
   390     FILE *usertypofile;
   391     if (strlen(argv[0])<sizeof(running_from))
   392 	/* save the path to the executable */
   393         strcpy(running_from,argv[0]);
   394     /* find out what directory we're running from */
   395     s=running_from+strlen(running_from);
   396     for (;*s!='/' && *s!='\\' && s>=running_from;s--)
   397         *s=0;
   398     switno=strlen(SWITCHES);
   399     for (i=switno;--i>0;)
   400         pswit[i]=0;           /* initialise switches */
   401     /*
   402      * Standard loop to extract switches.
   403      * When we come out of this loop, the arguments will be
   404      * in argv[0] upwards and the switches used will be
   405      * represented by their equivalent elements in pswit[]
   406      */
   407     while (--argc>0 && **++argv=='-')
   408         for (argsw=argv[0]+1;*argsw!='\0';argsw++)
   409             for (i=switno,invarg=1;(--i>=0) && invarg==1;)
   410                 if ((toupper(*argsw))==SWITCHES[i])
   411 		{
   412                     invarg=0;
   413                     pswit[i]=1;
   414 		}
   415     /* Paranoid checking is turned OFF, not on, by its switch */
   416     pswit[PARANOID_SWITCH]^=1;
   417     if (pswit[PARANOID_SWITCH])
   418 	/* if running in paranoid mode force typo checks as well   */
   419         pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1;
   420     /* Line-end checking is turned OFF, not on, by its switch */
   421     pswit[LINE_END_SWITCH]^=1;
   422     /* Echoing is turned OFF, not on, by its switch */
   423     pswit[ECHO_SWITCH]^=1;
   424     if (pswit[OVERVIEW_SWITCH])
   425 	/* just print summary; don't echo */
   426         pswit[ECHO_SWITCH]=0;
   427     /*
   428      * Web uploads - for the moment, this is really just a placeholder
   429      * until we decide what processing we really want to do on web uploads
   430      */
   431     if (pswit[WEB_SWITCH])
   432     {
   433 	/* specific override for web uploads */
   434         pswit[ECHO_SWITCH]=1;
   435         pswit[SQUOTE_SWITCH]=0;
   436         pswit[TYPO_SWITCH]=1;
   437         pswit[QPARA_SWITCH]=0;
   438         pswit[PARANOID_SWITCH]=1;
   439         pswit[LINE_END_SWITCH]=0;
   440         pswit[OVERVIEW_SWITCH]=0;
   441         pswit[STDOUT_SWITCH]=0;
   442         pswit[HEADER_SWITCH]=1;
   443         pswit[VERBOSE_SWITCH]=0;
   444         pswit[MARKUP_SWITCH]=0;
   445         pswit[USERTYPO_SWITCH]=0;
   446         pswit[DP_SWITCH]=0;
   447     }
   448     if (argc<MINARGS || argc>MAXARGS)
   449     {
   450 	/* check number of args */
   451         proghelp();
   452         return 1;
   453     }
   454     /* read in the user-defined stealth scanno list */
   455     if (pswit[USERTYPO_SWITCH])
   456     {
   457 	/* ... we were told we had one! */
   458         usertypofile=fopen(USERTYPO_FILE,"rb");
   459         if (!usertypofile)
   460 	{
   461 	    /* not in cwd. try excuteable directory. */
   462             strcpy(usertypo_file,running_from);
   463             strcat(usertypo_file,USERTYPO_FILE);
   464             usertypofile=fopen(usertypo_file,"rb");
   465             if (!usertypofile) {
   466 		/* we ain't got no user typo file! */
   467                 printf("   --> I couldn't find gutcheck.typ "
   468 		  "-- proceeding without user typos.\n");
   469 	    }
   470 	}
   471         usertypo_count=0;
   472         if (usertypofile)
   473 	{
   474 	    /* we managed to open a User Typo File! */
   475             if (pswit[USERTYPO_SWITCH])
   476 	    {
   477                 while (flgets(aline,LINEBUFSIZE-1,usertypofile,
   478 		  (long)usertypo_count))
   479 		{
   480                     if (strlen(aline)>1)
   481 		    {
   482                         if ((int)*aline>33)
   483 			{
   484                             s=malloc(strlen(aline)+1);
   485                             if (!s)
   486 			    {
   487                                 fprintf(stderr,"bookloupe: cannot get enough "
   488 				  "memory for user typo file!\n");
   489                                 exit(1);
   490 			    }
   491                             strcpy(s,aline);
   492                             usertypo[usertypo_count]=s;
   493                             usertypo_count++;
   494                             if (usertypo_count>=MAX_USER_TYPOS)
   495 			    {
   496                                 printf("   --> Only %d user-defined typos "
   497 				  "allowed: ignoring the rest\n",
   498 				  MAX_USER_TYPOS);
   499                                 break;
   500 			    }
   501 			}
   502 		    }
   503 		}
   504 	    }
   505             fclose(usertypofile);
   506 	}
   507     }
   508     fprintf(stderr,"bookloupe: Check and report on an e-text\n");
   509     cnt_dquot=cnt_squot=cnt_brack=cnt_bin=cnt_odd=cnt_long=
   510     cnt_short=cnt_punct=cnt_dash=cnt_word=cnt_html=cnt_lineend=
   511     cnt_spacend=0;
   512     procfile(argv[0]);
   513     if (pswit[OVERVIEW_SWITCH])
   514     {
   515 	printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
   516 	  checked_linecnt,linecnt,linecnt-checked_linecnt);
   517         printf("    --------------- Queries found --------------\n");
   518         if (cnt_long)
   519 	    printf("    Long lines:                    %14ld\n",cnt_long);
   520         if (cnt_short)
   521 	    printf("    Short lines:                   %14ld\n",cnt_short);
   522         if (cnt_lineend)
   523 	    printf("    Line-end problems:             %14ld\n",cnt_lineend);
   524         if (cnt_word)
   525 	    printf("    Common typos:                  %14ld\n",cnt_word);
   526         if (cnt_dquot)
   527 	    printf("    Unmatched quotes:              %14ld\n",cnt_dquot);
   528         if (cnt_squot)
   529 	    printf("    Unmatched SingleQuotes:        %14ld\n",cnt_squot);
   530         if (cnt_brack)
   531 	    printf("    Unmatched brackets:            %14ld\n",cnt_brack);
   532         if (cnt_bin)
   533 	    printf("    Non-ASCII characters:          %14ld\n",cnt_bin);
   534         if (cnt_odd)
   535 	    printf("    Proofing characters:           %14ld\n",cnt_odd);
   536         if (cnt_punct)
   537 	    printf("    Punctuation & spacing queries: %14ld\n",cnt_punct);
   538         if (cnt_dash)
   539 	    printf("    Non-standard dashes:           %14ld\n",cnt_dash);
   540         if (cnt_html)
   541 	    printf("    Possible HTML tags:            %14ld\n",cnt_html);
   542         printf("\n");
   543         printf("    TOTAL QUERIES                  %14ld\n",
   544           cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
   545           cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
   546     }
   547     return 0;
   548 }
   549 
   550 struct first_pass_results {
   551     long firstline,astline;
   552     long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma;
   553     long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit;
   554     long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash;
   555     signed int Dutchcount,Frenchcount;
   556 };
   557 
   558 /*
   559  * first_pass:
   560  *
   561  * Run a first pass - verify that it's a valid PG
   562  * file, decide whether to report some things that
   563  * occur many times in the text like long or short
   564  * lines, non-standard dashes, etc.
   565  */
   566 struct first_pass_results *first_pass(FILE *infile)
   567 {
   568     char laststart=CHAR_SPACE,*s;
   569     signed int i,llen;
   570     unsigned int lastlen=0,lastblen=0;
   571     long spline=0,nspline=0;
   572     static struct first_pass_results results={0};
   573     char inword[MAXWORDLEN]="";
   574     while (fgets(aline,LINEBUFSIZE-1,infile))
   575     {
   576         while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13)
   577 	    aline[strlen(aline)-1]=0;
   578         linecnt++;
   579         if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") &&
   580 	  (strstr(aline,"PUBLIC DOMAIN") || strstr(aline,"COPYRIGHT")))
   581 	{
   582             if (spline)
   583                 printf("   --> Duplicate header?\n");
   584             spline=linecnt+1;   /* first line of non-header text, that is */
   585 	}
   586         if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG"))
   587 	{
   588             if (nspline)
   589                 printf("   --> Duplicate header?\n");
   590             nspline=linecnt+1;   /* first line of non-header text, that is */
   591 	}
   592         if (spline || nspline)
   593 	{
   594             lowerit(aline);
   595             if (strstr(aline,"end") && strstr(aline,"project gutenberg"))
   596 	    {
   597                 if (strstr(aline,"end")<strstr(aline,"project gutenberg"))
   598 		{
   599                     if (results.footerline)
   600 		    {
   601 			/* it's an old-form header - we can detect duplicates */
   602                         if (!nspline)
   603                             printf("   --> Duplicate footer?\n");
   604 		    }
   605                     else
   606                         results.footerline=linecnt;
   607 		}
   608 	    }
   609 	}
   610         if (spline)
   611 	    results.firstline=spline;
   612         if (nspline)
   613 	    results.firstline=nspline;  /* override with new */
   614         if (results.footerline)
   615 	    continue;    /* don't count the boilerplate in the footer */
   616         llen=strlen(aline);
   617         results.totlen+=llen;
   618         for (i=0;i<llen;i++)
   619 	{
   620             if ((unsigned char)aline[i]>127)
   621 		results.binlen++;
   622             if (gcisalpha(aline[i]))
   623 		results.alphalen++;
   624             if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
   625 		results.endquote_count++;
   626 	}
   627         if (strlen(aline)>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE &&
   628 	  lastblen>2 && lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
   629 	    results.shortline++;
   630         if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE)
   631 	    cnt_spacend++;
   632         if (strstr(aline,".,"))
   633 	    results.dotcomma++;
   634         /* only count ast lines for ignoring purposes where there is */
   635         /* locase text on the line */
   636         if (strstr(aline,"*"))
   637 	{
   638             for (s=aline;*s;s++)
   639                 if (*s>='a' && *s<='z')
   640                     break;
   641              if (*s)
   642 		results.astline++;
   643 	}
   644         if (strstr(aline,"/"))
   645             results.fslashline++;
   646         for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
   647 	    ;
   648         if (aline[i]=='-' && aline[i-1]!='-')
   649 	    results.hyphens++;
   650         if (llen>LONGEST_PG_LINE)
   651 	    results.longline++;
   652         if (llen>WAY_TOO_LONG)
   653 	    results.verylongline++;
   654         if (strstr(aline,"<") && strstr(aline,">"))
   655 	{
   656             i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);
   657             if (i>0)
   658                 results.htmcount++;
   659             if (strstr(aline,"<i>"))
   660 		results.htmcount+=4; /* bonus marks! */
   661 	}
   662         /* Check for spaced em-dashes */
   663         if (strstr(aline,"--"))
   664 	{
   665             results.emdash++;
   666             if (*(strstr(aline,"--")-1)==CHAR_SPACE ||
   667                (*(strstr(aline,"--")+2)==CHAR_SPACE))
   668 		results.space_emdash++;
   669             if (*(strstr(aline,"--")-1)==CHAR_SPACE &&
   670                (*(strstr(aline,"--")+2)==CHAR_SPACE))
   671 		/* count of em-dashes with spaces both sides */
   672 		results.non_PG_space_emdash++;
   673             if (*(strstr(aline,"--")-1)!=CHAR_SPACE &&
   674                (*(strstr(aline,"--")+2)!=CHAR_SPACE))
   675 		/* count of PG-type em-dashes with no spaces */
   676 		results.PG_space_emdash++;
   677 	}
   678         for (s=aline;*s;)
   679 	{
   680             s=getaword(s,inword);
   681             if (!strcmp(inword,"hij") || !strcmp(inword,"niet")) 
   682                 results.Dutchcount++;
   683             if (!strcmp(inword,"dans") || !strcmp(inword,"avec")) 
   684                 results.Frenchcount++;
   685             if (!strcmp(inword,"0") || !strcmp(inword,"1")) 
   686                 results.standalone_digit++;
   687 	}
   688         /* Check for spaced dashes */
   689         if (strstr(aline," -") && *(strstr(aline," -")+2)!='-')
   690 	    results.spacedash++;
   691         lastblen=lastlen;
   692         lastlen=strlen(aline);
   693         laststart=aline[0];
   694     }
   695     return &results;
   696 }
   697 
   698 struct warnings {
   699     signed int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;
   700     signed int endquote,isDutch,isFrench;
   701 };
   702 
   703 /*
   704  * report_first_pass:
   705  *
   706  * Make some snap decisions based on the first pass results.
   707  */
   708 struct warnings *report_first_pass(struct first_pass_results *results)
   709 {
   710     static struct warnings warnings={0};
   711     if (cnt_spacend>0)
   712         printf("   --> %ld lines in this file have white space at end\n",
   713 	  cnt_spacend);
   714     warnings.dotcomma=1;
   715     if (results->dotcomma>5)
   716     {
   717         warnings.dotcomma=0;
   718         printf("   --> %ld lines in this file contain '.,'. "
   719 	  "Not reporting them.\n",results->dotcomma);
   720     }
   721     /*
   722      * If more than 50 lines, or one-tenth, are short,
   723      * don't bother reporting them.
   724      */
   725     warnings.shortline=1;
   726     if (results->shortline>50 || results->shortline*10>linecnt)
   727     {
   728         warnings.shortline=0;
   729         printf("   --> %ld lines in this file are short. "
   730 	  "Not reporting short lines.\n",results->shortline);
   731     }
   732     /*
   733      * If more than 50 lines, or one-tenth, are long,
   734      * don't bother reporting them.
   735      */
   736     warnings.longline=1;
   737     if (results->longline>50 || results->longline*10>linecnt)
   738     {
   739         warnings.longline=0;
   740         printf("   --> %ld lines in this file are long. "
   741 	  "Not reporting long lines.\n",results->longline);
   742     }
   743     /* If more than 10 lines contain asterisks, don't bother reporting them. */
   744     warnings.ast=1;
   745     if (results->astline>10)
   746     {
   747         warnings.ast=0;
   748         printf("   --> %ld lines in this file contain asterisks. "
   749 	  "Not reporting them.\n",results->astline);
   750     }
   751     /*
   752      * If more than 10 lines contain forward slashes,
   753      * don't bother reporting them.
   754      */
   755     warnings.fslash=1;
   756     if (results->fslashline>10)
   757     {
   758         warnings.fslash=0;
   759         printf("   --> %ld lines in this file contain forward slashes. "
   760 	  "Not reporting them.\n",results->fslashline);
   761     }
   762     /*
   763      * If more than 20 lines contain unpunctuated endquotes,
   764      * don't bother reporting them.
   765      */
   766     warnings.endquote=1;
   767     if (results->endquote_count>20)
   768     {
   769         warnings.endquote=0;
   770         printf("   --> %ld lines in this file contain unpunctuated endquotes. "
   771 	  "Not reporting them.\n",results->endquote_count);
   772     }
   773     /*
   774      * If more than 15 lines contain standalone digits,
   775      * don't bother reporting them.
   776      */
   777     warnings.digit=1;
   778     if (results->standalone_digit>10)
   779     {
   780         warnings.digit=0;
   781         printf("   --> %ld lines in this file contain standalone 0s and 1s. "
   782 	  "Not reporting them.\n",results->standalone_digit);
   783     }
   784     /*
   785      * If more than 20 lines contain hyphens at end,
   786      * don't bother reporting them.
   787      */
   788     warnings.hyphen=1;
   789     if (results->hyphens>20)
   790     {
   791         warnings.hyphen=0;
   792         printf("   --> %ld lines in this file have hyphens at end. "
   793 	  "Not reporting them.\n",results->hyphens);
   794     }
   795     if (results->htmcount>20 && !pswit[MARKUP_SWITCH])
   796     {
   797         printf("   --> Looks like this is HTML. Switching HTML mode ON.\n");
   798         pswit[MARKUP_SWITCH]=1;
   799     }
   800     if (results->verylongline>0)
   801         printf("   --> %ld lines in this file are VERY long!\n",
   802 	  results->verylongline);
   803     /*
   804      * If there are more non-PG spaced dashes than PG em-dashes,
   805      * assume it's deliberate.
   806      * Current PG guidelines say don't use them, but older texts do,
   807      * and some people insist on them whatever the guidelines say.
   808      */
   809     warnings.dash=1;
   810     if (results->spacedash+results->non_PG_space_emdash>
   811       results->PG_space_emdash)
   812     {
   813         warnings.dash=0;
   814         printf("   --> There are %ld spaced dashes and em-dashes. "
   815 	  "Not reporting them.\n",
   816 	  results->spacedash+results->non_PG_space_emdash);
   817     }
   818     /* If more than a quarter of characters are hi-bit, bug out. */
   819     warnings.bin=1;
   820     if (results->binlen*4>results->totlen)
   821     {
   822         printf("   --> This file does not appear to be ASCII. "
   823 	  "Terminating. Best of luck with it!\n");
   824         exit(1);
   825     }
   826     if (results->alphalen*4<results->totlen)
   827     {
   828         printf("   --> This file does not appear to be text. "
   829 	  "Terminating. Best of luck with it!\n");
   830         exit(1);
   831     }
   832     if (results->binlen*100>results->totlen || results->binlen>100)
   833     {
   834         printf("   --> There are a lot of foreign letters here. "
   835 	  "Not reporting them.\n");
   836         warnings.bin=0;
   837     }
   838     warnings.isDutch=0;
   839     if (results->Dutchcount>50)
   840     {
   841         warnings.isDutch=1;
   842         printf("   --> This looks like Dutch - "
   843 	  "switching off dashes and warnings for 's Middags case.\n");
   844     }
   845     warnings.isFrench=0;
   846     if (results->Frenchcount>50)
   847     {
   848         warnings.isFrench=1;
   849         printf("   --> This looks like French - "
   850 	  "switching off some doublepunct.\n");
   851     }
   852     if (results->firstline && results->footerline)
   853         printf("    The PG header and footer appear to be already on.\n");
   854     else
   855     {
   856         if (results->firstline)
   857             printf("    The PG header is on - no footer.\n");
   858         if (results->footerline)
   859             printf("    The PG footer is on - no header.\n");
   860     }
   861     printf("\n");
   862     if (pswit[VERBOSE_SWITCH])
   863     {
   864         warnings.bin=1;
   865         warnings.shortline=1;
   866         warnings.dotcomma=1;
   867         warnings.longline=1;
   868         warnings.dash=1;
   869         warnings.digit=1;
   870         warnings.ast=1;
   871         warnings.fslash=1;
   872         warnings.hyphen=1;
   873         warnings.endquote=1;
   874         printf("   *** Verbose output is ON -- you asked for it! ***\n");
   875     }
   876     if (warnings.isDutch)
   877         warnings.dash=0;
   878     if (results->footerline>0 && results->firstline>0 &&
   879       results->footerline>results->firstline &&
   880       results->footerline-results->firstline<100)
   881     {
   882         printf("   --> I don't really know where this text starts. \n");
   883         printf("       There are no reference points.\n");
   884         printf("       I'm going to have to report the header and footer "
   885 	  "as well.\n");
   886         results->firstline=0;
   887     }
   888     return &warnings;
   889 }
   890 
   891 /*
   892  * procfile:
   893  *
   894  * Process one file.
   895  */
   896 void procfile(char *filename)
   897 {
   898     char *s,*t,*s1,laststart,*wordstart;
   899     char inword[MAXWORDLEN],testword[MAXWORDLEN];
   900     char parastart[81];     /* first line of current para */
   901     FILE *infile;
   902     struct first_pass_results *first_pass_results;
   903     struct warnings *warnings;
   904     long quot,squot,start_para_line;
   905     signed int i,j,llen,isemptyline,isacro,isellipsis,istypo,alower,
   906       eNon_A,eTab,eTilde,eAst,eFSlash,eCarat;
   907     unsigned int lastlen,lastblen;
   908     signed int s_brack,c_brack,r_brack,c_unders;
   909     signed int open_single_quote,close_single_quote,guessquote,dquotepar,
   910       squotepar;
   911     signed int isnewpara,vowel,consonant;
   912     char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],
   913       cbrack_err[80],unders_err[80];
   914     signed int qword_index,qperiod_index,isdup;
   915     signed int enddash;
   916     laststart=CHAR_SPACE;
   917     lastlen=lastblen=0;
   918     *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=
   919       *unders_err=*prevline=0;
   920     linecnt=checked_linecnt=start_para_line=0;
   921     quot=squot=s_brack=c_brack=r_brack=c_unders=0;
   922     i=llen=isemptyline=isacro=isellipsis=istypo=0;
   923     isnewpara=vowel=consonant=enddash=0;
   924     qword_index=qperiod_index=isdup=0;
   925     *inword=*testword=0;
   926     open_single_quote=close_single_quote=guessquote=dquotepar=squotepar=0;
   927     for (j=0;j<MAX_QWORD;j++)
   928     {
   929         dupcnt[j]=0;
   930         for (i=0;i<MAX_QWORD_LENGTH;i++)
   931 	{
   932             qword[i][j]=0;
   933             qperiod[i][j]=0;
   934 	}
   935     }
   936     infile=fopen(filename,"rb");
   937     if (!infile)
   938     {
   939         if (pswit[STDOUT_SWITCH])
   940             fprintf(stdout,"bookloupe: cannot open %s\n",filename);
   941         else
   942             fprintf(stderr,"bookloupe: cannot open %s\n",filename);
   943 	exit(1);
   944     }
   945     fprintf(stdout,"\n\nFile: %s\n\n",filename);
   946     first_pass_results=first_pass(infile);
   947     warnings=report_first_pass(first_pass_results);
   948     rewind(infile);
   949     /*
   950      * Here we go with the main pass. Hold onto yer hat!
   951      * Re-init some variables we've dirtied.
   952      */
   953     quot=squot=linecnt=0;
   954     laststart=CHAR_SPACE;
   955     lastlen=lastblen=0;
   956     while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
   957     {
   958         linecnt++;
   959         if (linecnt==1)
   960 	    isnewpara=1;
   961         if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11))
   962 	    continue;    // skip DP page separators completely
   963         if (linecnt<first_pass_results->firstline ||
   964 	  (first_pass_results->footerline>0 &&
   965 	  linecnt>first_pass_results->footerline))
   966 	{
   967             if (pswit[HEADER_SWITCH])
   968 	    {
   969                 if (!strncmp(aline,"Title:",6))
   970                     printf("    %s\n",aline);
   971                 if (!strncmp(aline,"Author:",7))
   972                     printf("    %s\n",aline);
   973                 if (!strncmp(aline,"Release Date:",13))
   974                     printf("    %s\n",aline);
   975                 if (!strncmp(aline,"Edition:",8))
   976                     printf("    %s\n\n",aline);
   977 	    }
   978             continue;                /* skip through the header */
   979 	}
   980         checked_linecnt++;
   981         s=aline;
   982         isemptyline=1;    /* assume the line is empty until proven otherwise */
   983         /*
   984 	 * If we are in a state of unbalanced quotes, and this line
   985          * doesn't begin with a quote, output the stored error message.
   986          * If the -P switch was used, print the warning even if the
   987          * new para starts with quotes.
   988 	 */
   989         t=s;
   990         while (*t==' ')
   991 	    t++;
   992         if (*dquote_err)
   993             if (*t!=CHAR_DQUOTE || pswit[QPARA_SWITCH])
   994 	    {
   995                 if (!pswit[OVERVIEW_SWITCH])
   996 		{
   997                     if (pswit[ECHO_SWITCH])
   998 			printf("\n%s\n",parastart);
   999                     printf(dquote_err);
  1000 		}
  1001                 else
  1002                     cnt_dquot++;
  1003             }
  1004         if (*squote_err)
  1005 	{
  1006             if (*t!=CHAR_SQUOTE && *t!=CHAR_OPEN_SQUOTE ||
  1007 	      pswit[QPARA_SWITCH] || squot)
  1008 	    {
  1009                 if (!pswit[OVERVIEW_SWITCH])
  1010 		{
  1011                     if (pswit[ECHO_SWITCH])
  1012 			printf("\n%s\n",parastart);
  1013                     printf(squote_err);
  1014 		}
  1015                 else
  1016                     cnt_squot++;
  1017 	    }
  1018             squot=0;
  1019 	}
  1020         if (*rbrack_err)
  1021 	{
  1022             if (!pswit[OVERVIEW_SWITCH])
  1023 	    {
  1024                 if (pswit[ECHO_SWITCH])
  1025 		    printf("\n%s\n",parastart);
  1026                 printf(rbrack_err);
  1027 	    }
  1028             else
  1029                 cnt_brack++;
  1030 	}
  1031         if (*sbrack_err)
  1032 	{
  1033             if (!pswit[OVERVIEW_SWITCH])
  1034 	    {
  1035                 if (pswit[ECHO_SWITCH])
  1036 		    printf("\n%s\n",parastart);
  1037                 printf(sbrack_err);
  1038 	    }
  1039             else
  1040                 cnt_brack++;
  1041 	}
  1042         if (*cbrack_err)
  1043 	{
  1044             if (!pswit[OVERVIEW_SWITCH])
  1045 	    {
  1046                 if (pswit[ECHO_SWITCH])
  1047 		    printf("\n%s\n",parastart);
  1048                 printf(cbrack_err);
  1049 	    }
  1050             else
  1051                 cnt_brack++;
  1052 	}
  1053         if (*unders_err)
  1054 	{
  1055             if (!pswit[OVERVIEW_SWITCH])
  1056 	    {
  1057                 if (pswit[ECHO_SWITCH])
  1058 		    printf("\n%s\n",parastart);
  1059                 printf(unders_err);
  1060 	    }
  1061             else
  1062                 cnt_brack++;
  1063 	}
  1064         *dquote_err=*squote_err=*rbrack_err=*cbrack_err= 
  1065 	  *sbrack_err=*unders_err=0;
  1066 	/*
  1067          * Look along the line, accumulate the count of quotes, and see
  1068          * if this is an empty line - i.e. a line with nothing on it
  1069          * but spaces.
  1070          * If line has just spaces, period, * and/or - on it, don't
  1071          * count it, since empty lines with asterisks or dashes to
  1072          * separate sections are common.
  1073 	 */
  1074         s=aline;
  1075         while (*s)
  1076 	{
  1077             if (*s==CHAR_DQUOTE)
  1078 		quot++;
  1079             if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)
  1080 	    {
  1081                 if (s==aline)
  1082 		{
  1083 		    /*
  1084 		     * At start of line, it can only be an openquote.
  1085 		     * Hardcode a very common exception!
  1086 		     */
  1087                     if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))
  1088                         open_single_quote++;
  1089 		}
  1090                 else if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
  1091 		    /* Do nothing! it's definitely an apostrophe, not a quote */
  1092 		    ;
  1093 		/* it's outside a word - let's check it out */
  1094 		else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(*(s+1)))
  1095 		{
  1096 		    /* it damwell better BE an openquote */
  1097 		    if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))
  1098 			/* hardcode a very common exception! */
  1099 			open_single_quote++;
  1100 		}
  1101 		else
  1102 		{
  1103 		    /* now - is it a closequote? */
  1104 		    guessquote=0;   /* accumulate clues */
  1105 		    if (gcisalpha(s[-1]))
  1106 		    {
  1107 			/* it follows a letter - could be either */
  1108 			guessquote+=1;
  1109 			if (s[-1]=='s')
  1110 			{
  1111 			    /* looks like a plural apostrophe */
  1112 			    guessquote-=3;
  1113 			    if (s[1]==CHAR_SPACE)  /* bonus marks! */
  1114 				guessquote-=2;
  1115 			}
  1116 		    }
  1117 		    /* it doesn't have a letter either side */
  1118 		    else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))
  1119 			guessquote+=8; /* looks like a closequote */
  1120 		    else
  1121 			guessquote++;
  1122 		    if (open_single_quote>close_single_quote)
  1123 			/*
  1124 			 * Give it the benefit of some doubt,
  1125 			 * if a squote is already open.
  1126 			 */
  1127 			guessquote++;
  1128 		    else
  1129 			guessquote--;
  1130 		    if (guessquote>=0)
  1131 			close_single_quote++;
  1132 		}
  1133 	    }
  1134 	    if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
  1135 	      *s!=13 && *s!=10)
  1136 		isemptyline=0;  /* ignore lines like  *  *  *  as spacers */
  1137 	    if (*s==CHAR_UNDERSCORE)
  1138 		c_unders++;
  1139 	    if (*s==CHAR_OPEN_CBRACK)
  1140 		c_brack++;
  1141 	    if (*s==CHAR_CLOSE_CBRACK)
  1142 		c_brack--;
  1143 	    if (*s==CHAR_OPEN_RBRACK)
  1144 		r_brack++;
  1145 	    if (*s==CHAR_CLOSE_RBRACK)
  1146 		r_brack--;
  1147 	    if (*s==CHAR_OPEN_SBRACK)
  1148 		s_brack++;
  1149 	    if (*s==CHAR_CLOSE_SBRACK)
  1150 		s_brack--;
  1151 	    s++;
  1152 	}
  1153         if (isnewpara && !isemptyline)
  1154 	{
  1155 	    /* This line is the start of a new paragraph. */
  1156             start_para_line=linecnt;
  1157 	    /* Capture its first line in case we want to report it later. */
  1158             strncpy(parastart,aline,80);
  1159             parastart[79]=0;
  1160             dquotepar=squotepar=0; /* restart the quote count */
  1161             s=aline;
  1162             while (!gcisalpha(*s) && !gcisdigit(*s) && *s)
  1163 		s++;
  1164             if (*s>='a' && *s<='z')
  1165 	    {
  1166 		/* and its first letter is lowercase */
  1167                 if (pswit[ECHO_SWITCH])
  1168 		    printf("\n%s\n",aline);
  1169                 if (!pswit[OVERVIEW_SWITCH])
  1170                     printf("    Line %ld column %d - "
  1171 		      "Paragraph starts with lower-case\n",
  1172 		      linecnt,(int)(s-aline)+1);
  1173                 else
  1174                     cnt_punct++;
  1175 	    }
  1176             isnewpara=0; /* Signal the end of new para processing. */
  1177 	}
  1178         /* Check for an em-dash broken at line end. */
  1179         if (enddash && *aline=='-')
  1180 	{
  1181             if (pswit[ECHO_SWITCH])
  1182 		printf("\n%s\n",aline);
  1183             if (!pswit[OVERVIEW_SWITCH])
  1184                 printf("    Line %ld column 1 - Broken em-dash?\n",linecnt);
  1185             else
  1186                 cnt_punct++;
  1187 	}
  1188         enddash=0;
  1189         for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--)
  1190 	    ;
  1191         if (s>=aline && *s=='-')
  1192             enddash=1;
  1193 	/*
  1194          * Check for invalid or questionable characters in the line
  1195          * Anything above 127 is invalid for plain ASCII, and
  1196          * non-printable control characters should also be flagged.
  1197          * Tabs should generally not be there.
  1198 	 */
  1199         for (s=aline;*s;s++)
  1200 	{
  1201             i=(unsigned char)*s;
  1202             if (i<CHAR_SPACE && i!=CHAR_LF && i!=CHAR_CR && i!=CHAR_TAB)
  1203 	    {
  1204                 if (pswit[ECHO_SWITCH])
  1205 		    printf("\n%s\n",aline);
  1206                 if (!pswit[OVERVIEW_SWITCH])
  1207                     printf("    Line %ld column %d - Control character %d\n",
  1208 		      linecnt,(int)(s-aline)+1,i);
  1209                 else
  1210                     cnt_bin++;
  1211 	    }
  1212 	}
  1213         if (warnings->bin)
  1214 	{
  1215 	    /* Don't repeat multiple warnings on one line. */
  1216             eNon_A=eTab=eTilde=eCarat=eFSlash=eAst=0;
  1217             for (s=aline;*s;s++)
  1218 	    {
  1219                 if (!eNon_A &&
  1220 		  (*s<CHAR_SPACE && *s!=9 && *s!='\n' || (unsigned char)*s>127))
  1221 		{
  1222                     i=*s;  /* annoying kludge for signed chars */
  1223                     if (i<0)
  1224 			i+=256;
  1225                     if (pswit[ECHO_SWITCH])
  1226 			printf("\n%s\n",aline);
  1227                     if (!pswit[OVERVIEW_SWITCH])
  1228                         if (i>127 && i<160)
  1229                             printf("    Line %ld column %d - "
  1230 			      "Non-ISO-8859 character %d\n",
  1231 			      linecnt,(int)(s-aline)+1,i);
  1232                         else
  1233                             printf("    Line %ld column %d - "
  1234 			      "Non-ASCII character %d\n",
  1235 			      linecnt,(int)(s-aline)+1,i);
  1236                     else
  1237                         cnt_bin++;
  1238                     eNon_A=1;
  1239 		}
  1240                 if (!eTab && *s==CHAR_TAB)
  1241 		{
  1242                     if (pswit[ECHO_SWITCH])
  1243 			printf("\n%s\n",aline);
  1244                     if (!pswit[OVERVIEW_SWITCH])
  1245                         printf("    Line %ld column %d - Tab character?\n",
  1246 			  linecnt,(int)(s-aline)+1);
  1247                     else
  1248                         cnt_odd++;
  1249                     eTab=1;
  1250 		}
  1251                 if (!eTilde && *s==CHAR_TILDE)
  1252 		{
  1253 		    /*
  1254 		     * Often used by OCR software to indicate an
  1255 		     * unrecognizable character.
  1256 		     */
  1257                     if (pswit[ECHO_SWITCH])
  1258 			printf("\n%s\n",aline);
  1259                     if (!pswit[OVERVIEW_SWITCH])
  1260                         printf("    Line %ld column %d - Tilde character?\n",
  1261 			  linecnt,(int)(s-aline)+1);
  1262                     else
  1263                         cnt_odd++;
  1264                     eTilde=1;
  1265 		}
  1266                 if (!eCarat && *s==CHAR_CARAT)
  1267 		{  
  1268                     if (pswit[ECHO_SWITCH])
  1269 			printf("\n%s\n",aline);
  1270                     if (!pswit[OVERVIEW_SWITCH])
  1271                         printf("    Line %ld column %d - Carat character?\n",
  1272 			  linecnt,(int)(s-aline)+1);
  1273                     else
  1274                         cnt_odd++;
  1275                     eCarat=1;
  1276 		}
  1277                 if (!eFSlash && *s==CHAR_FORESLASH && warnings->fslash)
  1278 		{  
  1279                     if (pswit[ECHO_SWITCH])
  1280 			printf("\n%s\n",aline);
  1281                     if (!pswit[OVERVIEW_SWITCH])
  1282                         printf("    Line %ld column %d - Forward slash?\n",
  1283 			  linecnt,(int)(s-aline)+1);
  1284                     else
  1285                         cnt_odd++;
  1286                     eFSlash=1;
  1287 		}
  1288                 /*
  1289 		 * Report asterisks only in paranoid mode,
  1290 		 * since they're often deliberate.
  1291 		 */
  1292                 if (!eAst && pswit[PARANOID_SWITCH] && warnings->ast &&
  1293 		  !isemptyline && *s==CHAR_ASTERISK)
  1294 		{
  1295                     if (pswit[ECHO_SWITCH])
  1296 			printf("\n%s\n",aline);
  1297                     if (!pswit[OVERVIEW_SWITCH])
  1298                         printf("    Line %ld column %d - Asterisk?\n",
  1299 			  linecnt,(int)(s-aline)+1);
  1300                     else
  1301                         cnt_odd++;
  1302                     eAst=1;
  1303 		}
  1304 	    }
  1305 	}
  1306         /* Check for line too long. */
  1307         if (warnings->longline)
  1308 	{
  1309             if (strlen(aline)>LONGEST_PG_LINE)
  1310 	    {
  1311                 if (pswit[ECHO_SWITCH])
  1312 		    printf("\n%s\n",aline);
  1313                 if (!pswit[OVERVIEW_SWITCH])
  1314                     printf("    Line %ld column %d - Long line %d\n",
  1315 		      linecnt,strlen(aline),strlen(aline));
  1316                 else
  1317                     cnt_long++;
  1318 	    }
  1319 	}
  1320         /*
  1321 	 * Check for line too short.
  1322          * This one is a bit trickier to implement: we don't want to
  1323          * flag the last line of a paragraph for being short, so we
  1324          * have to wait until we know that our current line is a
  1325          * "normal" line, then report the _previous_ line if it was too
  1326          * short. We also don't want to report indented lines like
  1327          * chapter heads or formatted quotations. We therefore keep
  1328          * lastlen as the length of the last line examined, and
  1329          * lastblen as the length of the last but one, and try to
  1330          * suppress unnecessary warnings by checking that both were of
  1331          * "normal" length. We keep the first character of the last
  1332          * line in laststart, and if it was a space, we assume that the
  1333          * formatting is deliberate. I can't figure out a way to
  1334          * distinguish something like a quoted verse left-aligned or
  1335          * the header or footer of a letter from a paragraph of short
  1336          * lines - maybe if I examined the whole paragraph, and if the
  1337          * para has less than, say, 8 lines and if all lines are short,
  1338          * then just assume it's OK? Need to look at some texts to see
  1339          * how often a formula like this would get the right result.
  1340 	 */
  1341         if (warnings->shortline && strlen(aline)>1 && lastlen>1 &&
  1342 	  lastlen<SHORTEST_PG_LINE && lastblen>1 && lastblen>SHORTEST_PG_LINE &&
  1343 	  laststart!=CHAR_SPACE)
  1344 	{
  1345 	    if (pswit[ECHO_SWITCH])
  1346 		printf("\n%s\n",prevline);
  1347 	    if (!pswit[OVERVIEW_SWITCH])
  1348 		printf("    Line %ld column %d - Short line %d?\n",
  1349 		  linecnt-1,strlen(prevline),strlen(prevline));
  1350 	    else
  1351 		cnt_short++;
  1352 	}
  1353         lastblen=lastlen;
  1354         lastlen=strlen(aline);
  1355         laststart=aline[0];
  1356         /* Look for punctuation other than full ellipses at start of line. */
  1357         if (*aline && strchr(".?!,;:",aline[0]) && strncmp(". . .",aline,5))
  1358 	{
  1359 	    if (pswit[ECHO_SWITCH])
  1360 		printf("\n%s\n",aline);
  1361 	    if (!pswit[OVERVIEW_SWITCH])
  1362 		printf("    Line %ld column 1 - Begins with punctuation?\n",
  1363 		  linecnt);
  1364 	    else
  1365 		cnt_punct++;
  1366 	}
  1367         /*
  1368 	 * Check for spaced em-dashes.
  1369          * We must check _all_ occurrences of "--" on the line
  1370          * hence the loop - even if the first double-dash is OK
  1371          * there may be another that's wrong later on.
  1372 	 */
  1373         if (warnings->dash)
  1374 	{
  1375             s=aline;
  1376             while (strstr(s,"--"))
  1377 	    {
  1378                 if (*(strstr(s,"--")-1)==CHAR_SPACE ||
  1379                    (*(strstr(s,"--")+2)==CHAR_SPACE))
  1380 		{
  1381                     if (pswit[ECHO_SWITCH])
  1382 			printf("\n%s\n",aline);
  1383                     if (!pswit[OVERVIEW_SWITCH])
  1384                         printf("    Line %ld column %d - Spaced em-dash?\n",
  1385 			  linecnt,(int)(strstr(s,"--")-aline)+1);
  1386                     else
  1387                         cnt_dash++;
  1388 		}
  1389                 s=strstr(s,"--")+2;
  1390 	    }
  1391 	}
  1392         /* Check for spaced dashes. */
  1393         if (warnings->dash)
  1394 	{
  1395             if (strstr(aline," -"))
  1396 	    {
  1397                 if (*(strstr(aline," -")+2)!='-')
  1398 		{
  1399                     if (pswit[ECHO_SWITCH])
  1400 			printf("\n%s\n",aline);
  1401                     if (!pswit[OVERVIEW_SWITCH])
  1402                         printf("    Line %ld column %d - Spaced dash?\n",
  1403 			  linecnt,(int)(strstr(aline," -")-aline)+1);
  1404                     else
  1405                         cnt_dash++;
  1406 		}
  1407 	    }
  1408             else if (strstr(aline,"- "))
  1409 	    {
  1410 		if (*(strstr(aline,"- ")-1)!='-')
  1411 		{
  1412 		    if (pswit[ECHO_SWITCH])
  1413 			printf("\n%s\n",aline);
  1414 		    if (!pswit[OVERVIEW_SWITCH])
  1415 			printf("    Line %ld column %d - Spaced dash?\n",
  1416 			  linecnt,(int)(strstr(aline,"- ")-aline)+1);
  1417 		    else
  1418 			cnt_dash++;
  1419 		}
  1420 	    }
  1421 	}
  1422         /*
  1423 	 * Check for unmarked paragraphs indicated by separate speakers.
  1424          * May well be false positive:
  1425          * "Bravo!" "Wonderful!" called the crowd.
  1426          * but useful all the same.
  1427 	 */
  1428         s=wrk;
  1429         *s=0;
  1430         if (strstr(aline,"\" \""))
  1431 	    s=strstr(aline,"\" \"");
  1432         if (strstr(aline,"\"  \""))
  1433 	    s=strstr(aline,"\"  \"");
  1434         if (*s)
  1435 	{
  1436             if (pswit[ECHO_SWITCH])
  1437 		printf("\n%s\n",aline);
  1438             if (!pswit[OVERVIEW_SWITCH])
  1439                 printf("    Line %ld column %d - "
  1440 		  "Query missing paragraph break?\n",
  1441 		  linecnt,(int)(s-aline)+1);
  1442             else
  1443                 cnt_punct++;
  1444 	}
  1445         /*
  1446 	 * Check for "to he" and other easy he/be errors.
  1447          * This is a very inadequate effort on the he/be problem,
  1448          * but the phrase "to he" is always an error, whereas "to
  1449          * be" is quite common.
  1450          * Similarly, '"Quiet!", be said.' is a non-be error
  1451          * "to he" is _not_ always an error!:
  1452          *       "Where they went to he couldn't say."
  1453          * Another false positive:
  1454          *       What would "Cinderella" be without the . . .
  1455          * and another: "If he wants to he can see for himself."
  1456 	 */
  1457         s=wrk;
  1458         *s=0;
  1459         if (strstr(aline," to he "))
  1460 	    s=strstr(aline," to he ");
  1461         if (strstr(aline,"\" be "))
  1462 	    s=strstr(aline,"\" be ");
  1463         if (strstr(aline,"\", be "))
  1464 	    s=strstr(aline,"\", be ");
  1465         if (strstr(aline," is be "))
  1466 	    s=strstr(aline," is be ");
  1467         if (strstr(aline," be is "))
  1468 	    s=strstr(aline," be is ");
  1469         if (strstr(aline," was be "))
  1470 	    s=strstr(aline," was be ");
  1471         if (strstr(aline," be would "))
  1472 	    s=strstr(aline," be would ");
  1473         if (strstr(aline," be could "))
  1474 	    s=strstr(aline," be could ");
  1475         if (*s)
  1476 	{
  1477             if (pswit[ECHO_SWITCH])
  1478 		printf("\n%s\n",aline);
  1479             if (!pswit[OVERVIEW_SWITCH])
  1480                 printf("    Line %ld column %d - Query he/be error?\n",
  1481 		  linecnt,(int)(s-aline)+1);
  1482             else
  1483                 cnt_word++;
  1484 	}
  1485         s=wrk;
  1486         *s=0;
  1487         if (strstr(aline," i bad "))
  1488 	    s=strstr(aline," i bad ");
  1489         if (strstr(aline," you bad "))
  1490 	    s=strstr(aline," you bad ");
  1491         if (strstr(aline," he bad "))
  1492 	    s=strstr(aline," he bad ");
  1493         if (strstr(aline," she bad "))
  1494 	    s=strstr(aline," she bad ");
  1495         if (strstr(aline," they bad "))
  1496 	    s=strstr(aline," they bad ");
  1497         if (strstr(aline," a had "))
  1498 	    s=strstr(aline," a had ");
  1499         if (strstr(aline," the had "))
  1500 	    s=strstr(aline," the had ");
  1501         if (*s)
  1502 	{
  1503             if (pswit[ECHO_SWITCH])
  1504 		printf("\n%s\n",aline);
  1505             if (!pswit[OVERVIEW_SWITCH])
  1506                 printf("    Line %ld column %d - Query had/bad error?\n",
  1507 		  linecnt,(int)(s-aline)+1);
  1508             else
  1509                 cnt_word++;
  1510 	}
  1511         s=wrk;
  1512         *s=0;
  1513         if (strstr(aline,", hut "))
  1514 	    s=strstr(aline,", hut ");
  1515         if (strstr(aline,"; hut "))
  1516 	    s=strstr(aline,"; hut ");
  1517         if (*s)
  1518 	{
  1519             if (pswit[ECHO_SWITCH])
  1520 		printf("\n%s\n",aline);
  1521             if (!pswit[OVERVIEW_SWITCH])
  1522                 printf("    Line %ld column %d - Query hut/but error?\n",
  1523 		  linecnt,(int)(s-aline)+1);
  1524             else
  1525                 cnt_word++;
  1526 	}
  1527         /*
  1528 	 * Special case - angled bracket in front of "From" placed there by an
  1529 	 * MTA when sending an e-mail.
  1530 	 */
  1531         if (strstr(aline,">From"))
  1532 	{
  1533             if (pswit[ECHO_SWITCH])
  1534 		printf("\n%s\n",aline);
  1535             if (!pswit[OVERVIEW_SWITCH])
  1536                 printf("    Line %ld column %d - "
  1537 		  "Query angled bracket with From\n",
  1538 		  linecnt,(int)(strstr(aline,">From")-aline)+1);
  1539             else
  1540                 cnt_punct++;
  1541 	}
  1542         /*
  1543 	 * Check for a single character line -
  1544 	 * often an overflow from bad wrapping.
  1545 	 */
  1546         if (*aline && !aline[1])
  1547 	{
  1548             if (*aline=='I' || *aline=='V' || *aline=='X' || *aline=='L' ||
  1549 	      gcisdigit(*aline))
  1550                 ; /* Nothing - ignore numerals alone on a line. */
  1551             else
  1552 	    {
  1553                 if (pswit[ECHO_SWITCH])
  1554 		    printf("\n%s\n",aline);
  1555                 if (!pswit[OVERVIEW_SWITCH])
  1556                     printf("    Line %ld column 1 - "
  1557 		      "Query single character line\n",linecnt);
  1558                 else
  1559                     cnt_punct++;
  1560 	    }
  1561 	}
  1562         /* Check for I" - often should be ! */
  1563         if (strstr(aline," I\""))
  1564 	{
  1565             if (pswit[ECHO_SWITCH])
  1566 		printf("\n%s\n",aline);
  1567             if (!pswit[OVERVIEW_SWITCH])
  1568                 printf("    Line %ld column %ld - Query I=exclamation mark?\n",
  1569 		  linecnt,strstr(aline," I\"")-aline);
  1570             else
  1571                 cnt_punct++;
  1572 	}
  1573         /*
  1574 	 * Check for period without a capital letter. Cut-down from gutspell.
  1575          * Only works when it happens on a single line.
  1576 	 */
  1577         if (pswit[PARANOID_SWITCH])
  1578 	{
  1579             for (t=s=aline;strstr(t,". ");)
  1580 	    {
  1581                 t=strstr(t,". ");
  1582                 if (t==s)
  1583 		{
  1584                     t++;
  1585 		    /* start of line punctuation is handled elsewhere */
  1586                     continue;
  1587 		}
  1588                 if (!gcisalpha(t[-1]))
  1589 		{
  1590                     t++;
  1591                     continue;
  1592 		}
  1593                 if (warnings->isDutch)
  1594 		{
  1595 		    /* For Frank & Jeroen -- 's Middags case */
  1596                     if (t[2]==CHAR_SQUOTE && t[3]>='a' && t[3]<='z' &&
  1597 		      t[4]==CHAR_SPACE && t[5]>='A' && t[5]<='Z')
  1598 		    {
  1599                         t++;
  1600                         continue;
  1601 		    }
  1602 		}
  1603                 s1=t+2;
  1604                 while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
  1605                     s1++;
  1606                 if (*s1>='a' && *s1<='z')
  1607 		{
  1608 		    /* we have something to investigate */
  1609                     istypo=1;
  1610 		    /* so let's go back and find out */
  1611                     for (s1=t-1;s1>=s &&
  1612 		      (gcisalpha(*s1) || gcisdigit(*s1) || *s1==CHAR_SQUOTE &&
  1613 		      gcisalpha(s1[1]) && gcisalpha(s1[-1]));s1--)
  1614 			;
  1615                     s1++;
  1616                     for (i=0;*s1 && *s1!='.';s1++,i++)
  1617                         testword[i]=*s1;
  1618                     testword[i]=0;
  1619                     for (i=0;*abbrev[i];i++)
  1620                         if (!strcmp(testword,abbrev[i]))
  1621                             istypo=0;
  1622                     if (gcisdigit(*testword))
  1623 			istypo=0;
  1624                     if (!testword[1])
  1625 			istypo=0;
  1626                     if (isroman(testword))
  1627 			istypo=0;
  1628                     if (istypo)
  1629 		    {
  1630                         istypo=0;
  1631                         for (i=0;testword[i];i++)
  1632                             if (strchr(vowels,testword[i]))
  1633                                 istypo=1;
  1634 		    }
  1635                     if (istypo)
  1636 		    {
  1637                         isdup=0;
  1638                         if (strlen(testword)<MAX_QWORD_LENGTH &&
  1639 			  !pswit[VERBOSE_SWITCH])
  1640                             for (i=0;i<qperiod_index;i++)
  1641                                 if (!strcmp(testword,qperiod[i]))
  1642                                     isdup=1;
  1643                         if (!isdup)
  1644 			{
  1645                             if (qperiod_index<MAX_QWORD &&
  1646 			      strlen(testword)<MAX_QWORD_LENGTH)
  1647 			    {
  1648                                 strcpy(qperiod[qperiod_index],testword);
  1649                                 qperiod_index++;
  1650 			    }
  1651                             if (pswit[ECHO_SWITCH])
  1652 				printf("\n%s\n",aline);
  1653                             if (!pswit[OVERVIEW_SWITCH])
  1654                                 printf("    Line %ld column %d - "
  1655 				  "Extra period?\n",linecnt,(int)(t-aline)+1);
  1656                             else
  1657                                 cnt_punct++;
  1658 			}
  1659 		    }
  1660 		}
  1661 	    t++;
  1662 	    }
  1663 	}
  1664         if (pswit[TYPO_SWITCH])
  1665 	{
  1666             /* Check for words usually not followed by punctuation. */
  1667             for (s=aline;*s;)
  1668 	    {
  1669                 wordstart=s;
  1670                 s=getaword(s,inword);
  1671                 if (!*inword)
  1672 		    continue;
  1673                 lowerit(inword);
  1674                 for (i=0;*nocomma[i];i++)
  1675                     if (!strcmp(inword,nocomma[i]))
  1676 		    {
  1677                         if (*s==',' || *s==';' || *s==':')
  1678 			{
  1679                             if (pswit[ECHO_SWITCH])
  1680 				printf("\n%s\n",aline);
  1681                             if (!pswit[OVERVIEW_SWITCH])
  1682                                 printf("    Line %ld column %d - "
  1683 				  "Query punctuation after %s?\n",
  1684 				  linecnt,(int)(s-aline)+1,inword);
  1685                             else
  1686                                 cnt_punct++;
  1687 			}
  1688 		    }
  1689 		for (i=0;*noperiod[i];i++)
  1690                     if (!strcmp(inword,noperiod[i]))
  1691 		    {
  1692                         if (*s=='.' || *s=='!')
  1693 			{
  1694                             if (pswit[ECHO_SWITCH])
  1695 				printf("\n%s\n",aline);
  1696                             if (!pswit[OVERVIEW_SWITCH])
  1697                                 printf("    Line %ld column %d - "
  1698 				  "Query punctuation after %s?\n",
  1699 				  linecnt,(int)(s-aline)+1,inword);
  1700                             else
  1701                                 cnt_punct++;
  1702 			}
  1703 		    }
  1704 	    }
  1705 	}
  1706         /*
  1707 	 * Check for commonly mistyped words,
  1708 	 * and digits like 0 for O in a word.
  1709 	 */
  1710         for (s=aline;*s;)
  1711 	{
  1712             wordstart=s;
  1713             s=getaword(s,inword);
  1714             if (!*inword)
  1715 		continue; /* don't bother with empty lines */
  1716             if (mixdigit(inword))
  1717 	    {
  1718                 if (pswit[ECHO_SWITCH])
  1719 		    printf("\n%s\n",aline);
  1720                 if (!pswit[OVERVIEW_SWITCH])
  1721                     printf("    Line %ld column %d - Query digit in %s\n",
  1722 		      linecnt,(int)(wordstart-aline)+1,inword);
  1723                 else
  1724                     cnt_word++;
  1725 	    }
  1726             /*
  1727 	     * Put the word through a series of tests for likely typos and OCR
  1728 	     * errors.
  1729 	     */
  1730             if (pswit[TYPO_SWITCH])
  1731 	    {
  1732                 istypo=0;
  1733                 strcpy(testword,inword);
  1734                 alower=0;
  1735                 for (i=0;i<(signed int)strlen(testword);i++)
  1736 		{
  1737 		    /* lowercase for testing */
  1738                     if (testword[i]>='a' && testword[i]<='z')
  1739 			alower=1;
  1740                     if (alower && testword[i]>='A' && testword[i]<='Z')
  1741 		    {
  1742                         /*
  1743 			 * We have an uppercase mid-word. However, there are
  1744 			 * common cases:
  1745                          *   Mac and Mc like McGill
  1746                          *   French contractions like l'Abbe
  1747 			 */
  1748                         if (i==2 && testword[0]=='m' && testword[1]=='c' ||
  1749                           i==3 && testword[0]=='m' && testword[1]=='a' &&
  1750 			  testword[2]=='c' || i>0 && testword[i-1]==CHAR_SQUOTE)
  1751 			    ; /* do nothing! */
  1752                         else
  1753                             istypo=1;
  1754 		    }
  1755                     testword[i]=(char)tolower(testword[i]);
  1756 		}
  1757                 /*
  1758 		 * Check for certain unlikely two-letter combinations at word
  1759 		 * start and end.
  1760 		 */
  1761                 if (strlen(testword)>1)
  1762 		{
  1763                     for (i=0;*nostart[i];i++)
  1764                         if (!strncmp(testword,nostart[i],2))
  1765                             istypo=1;
  1766                     for (i=0;*noend[i];i++)
  1767                         if (!strncmp(testword+strlen(testword)-2,noend[i],2))
  1768                             istypo=1;
  1769 		}
  1770                 /* ght is common, gbt never. Like that. */
  1771                 if (strstr(testword,"cb"))
  1772 		    istypo=1;
  1773                 if (strstr(testword,"gbt"))
  1774 		    istypo=1;
  1775                 if (strstr(testword,"pbt"))
  1776 		    istypo=1;
  1777                 if (strstr(testword,"tbs"))
  1778 		    istypo=1;
  1779                 if (strstr(testword,"mrn"))
  1780 		    istypo=1;
  1781                 if (strstr(testword,"ahle"))
  1782 		    istypo=1;
  1783                 if (strstr(testword,"ihle"))
  1784 		    istypo=1;
  1785                 /*
  1786 		 * "TBE" does happen - like HEARTBEAT - but uncommon.
  1787                  * Also "TBI" - frostbite, outbid - but uncommon.
  1788                  * Similarly "ii" like Hawaii, or Pompeii, and in Roman
  1789 		 * numerals, but "ii" is a common scanno.
  1790 		 */
  1791                 if (strstr(testword,"tbi"))
  1792 		    istypo=1;
  1793                 if (strstr(testword,"tbe"))
  1794 		    istypo=1;
  1795                 if (strstr(testword,"ii"))
  1796 		    istypo=1;
  1797                 /*
  1798 		 * Check for no vowels or no consonants.
  1799                  * If none, flag a typo.
  1800 		 */
  1801                 if (!istypo && strlen(testword)>1)
  1802 		{
  1803                     vowel=consonant=0;
  1804                     for (i=0;testword[i];i++)
  1805 		    {
  1806                         if (testword[i]=='y' || gcisdigit(testword[i]))
  1807 			{
  1808 			    /* Yah, this is loose. */
  1809                             vowel++;
  1810                             consonant++;
  1811 			}
  1812                         else if (strchr(vowels,testword[i]))
  1813 			    vowel++;
  1814 			else
  1815 			    consonant++;
  1816 		    }
  1817                     if (!vowel || !consonant)
  1818                         istypo=1;
  1819 		}
  1820                 /*
  1821 		 * Now exclude the word from being reported if it's in
  1822                  * the okword list.
  1823 		 */
  1824                 for (i=0;*okword[i];i++)
  1825                     if (!strcmp(testword,okword[i]))
  1826                         istypo=0;
  1827                 /*
  1828 		 * What looks like a typo may be a Roman numeral.
  1829 		 * Exclude these.
  1830 		 */
  1831                 if (istypo && isroman(testword))
  1832 		    istypo=0;
  1833                 /* Check the manual list of typos. */
  1834                 if (!istypo)
  1835                     for (i=0;*typo[i];i++)
  1836                         if (!strcmp(testword,typo[i]))
  1837                             istypo=1;
  1838                 /*
  1839 		 * Check lowercase s, l, i and m - special cases.
  1840                  *   "j" - often a semi-colon gone wrong.
  1841                  *   "d" for a missing apostrophe - he d
  1842                  *   "n" for "in"
  1843 		 */
  1844                 if (!istypo && strlen(testword)==1 && strchr("slmijdn",*inword))
  1845 		    istypo=1;
  1846                 if (istypo)
  1847 		{
  1848                     isdup=0;
  1849                     if (strlen(testword)<MAX_QWORD_LENGTH &&
  1850 		      !pswit[VERBOSE_SWITCH])
  1851                         for (i=0;i<qword_index;i++)
  1852                             if (!strcmp(testword,qword[i]))
  1853 			    {
  1854                                 isdup=1;
  1855                                 ++dupcnt[i];
  1856 			    }
  1857                     if (!isdup)
  1858 		    {
  1859                         if (qword_index<MAX_QWORD &&
  1860 			  strlen(testword)<MAX_QWORD_LENGTH)
  1861 			{
  1862                             strcpy(qword[qword_index],testword);
  1863                             qword_index++;
  1864 			}
  1865                         if (pswit[ECHO_SWITCH])
  1866 			    printf("\n%s\n",aline);
  1867                         if (!pswit[OVERVIEW_SWITCH])
  1868 			{
  1869                             printf("    Line %ld column %d - Query word %s",
  1870 			      linecnt,(int)(wordstart-aline)+1,inword);
  1871                             if (strlen(testword)<MAX_QWORD_LENGTH &&
  1872 			      !pswit[VERBOSE_SWITCH])
  1873                                 printf(" - not reporting duplicates");
  1874                             printf("\n");
  1875 			}
  1876                         else
  1877                             cnt_word++;
  1878 		    }
  1879 		}
  1880 	    }
  1881 	    /* check the user's list of typos */
  1882 	    if (!istypo && usertypo_count)
  1883 		for (i=0;i<usertypo_count;i++)
  1884 		    if (!strcmp(testword,usertypo[i]))
  1885 		    {
  1886 			if (pswit[ECHO_SWITCH])
  1887 			    printf("\n%s\n",aline);
  1888 			if (!pswit[OVERVIEW_SWITCH])  
  1889 			    printf("    Line %ld column %d - "
  1890 			      "Query possible scanno %s\n",
  1891 			      linecnt,(int)(wordstart-aline)+2,inword);
  1892 		    }
  1893             if (pswit[PARANOID_SWITCH] && warnings->digit)
  1894 	    {
  1895 		/* In paranoid mode, query all 0 and 1 standing alone. */
  1896                 if (!strcmp(inword,"0") || !strcmp(inword,"1"))
  1897 		{
  1898                     if (pswit[ECHO_SWITCH])
  1899 			printf("\n%s\n",aline);
  1900                     if (!pswit[OVERVIEW_SWITCH])
  1901                         printf("    Line %ld column %d - Query standalone %s\n",
  1902 			  linecnt,(int)(wordstart-aline)+2,inword);
  1903                     else
  1904                         cnt_word++;
  1905 		}
  1906 	    }
  1907 	}
  1908 	/*
  1909          * Look for added or missing spaces around punctuation and quotes.
  1910          * If there is a punctuation character like ! with no space on
  1911          * either side, suspect a missing!space. If there are spaces on
  1912          * both sides , assume a typo. If we see a double quote with no
  1913          * space or punctuation on either side of it, assume unspaced
  1914          * quotes "like"this.
  1915 	 */
  1916         llen=strlen(aline);
  1917         for (i=1;i<llen;i++)
  1918 	{
  1919 	    /* For each character in the line after the first. */
  1920             if (strchr(".?!,;:_",aline[i]))  /* if it's punctuation */
  1921 	    {
  1922 		/* we need to suppress warnings for acronyms like M.D. */
  1923                 isacro=0;
  1924 		/* we need to suppress warnings for ellipsis . . . */
  1925                 isellipsis=0;
  1926 		/* if there are letters on both sides of it or ... */
  1927                 if (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1]) ||
  1928                    gcisalpha(aline[i+1]) && strchr("?!,;:",aline[i]))
  1929 		{
  1930 		    /* ...if it's strict punctuation followed by an alpha */
  1931                     if (aline[i]=='.')
  1932 		    {
  1933                         if (i>2 && aline[i-2]=='.')
  1934 			    isacro=1;
  1935                         if (i+2<llen && aline[i+2]=='.')
  1936 			    isacro=1;
  1937 		    }
  1938                     if (!isacro)
  1939 		    {
  1940                         if (pswit[ECHO_SWITCH])
  1941 			    printf("\n%s\n",aline);
  1942                         if (!pswit[OVERVIEW_SWITCH])
  1943                             printf("    Line %ld column %d - Missing space?\n",
  1944 			      linecnt,i+1);
  1945                         else
  1946                             cnt_punct++;
  1947 		    }
  1948 		}
  1949                 if (aline[i-1]==CHAR_SPACE &&
  1950 		  (aline[i+1]==CHAR_SPACE || aline[i+1]==0))
  1951 		{
  1952 		    /*
  1953 		     * If there are spaces on both sides,
  1954 		     * or space before and end of line.
  1955 		     */
  1956                     if (aline[i]=='.')
  1957 		    {
  1958                         if (i>2 && aline[i-2]=='.')
  1959 			    isellipsis=1;
  1960                         if (i+2<llen && aline[i+2]=='.')
  1961 			    isellipsis=1;
  1962 		    }
  1963                     if (!isemptyline && !isellipsis)
  1964 		    {
  1965                         if (pswit[ECHO_SWITCH])
  1966 			    printf("\n%s\n",aline);
  1967                         if (!pswit[OVERVIEW_SWITCH])
  1968                             printf("    Line %ld column %d - "
  1969 			      "Spaced punctuation?\n",linecnt,i+1);
  1970                         else
  1971                             cnt_punct++;
  1972 		    }
  1973 		}
  1974 	    }
  1975 	}
  1976         /* Split out the characters that CANNOT be preceded by space. */
  1977         llen=strlen(aline);
  1978         for (i=1;i<llen;i++)
  1979 	{
  1980 	    /* for each character in the line after the first */
  1981             if (strchr("?!,;:",aline[i]))
  1982 	    {
  1983 		/* if it's punctuation that _cannot_ have a space before it */
  1984                 if (aline[i-1]==CHAR_SPACE && !isemptyline &&
  1985 		  aline[i+1]!=CHAR_SPACE)
  1986 		{
  1987 		    /*
  1988 		     * If aline[i+1) DOES == space,
  1989 		     * it was already reported just above.
  1990 		     */
  1991                     if (pswit[ECHO_SWITCH])
  1992 			printf("\n%s\n",aline);
  1993                     if (!pswit[OVERVIEW_SWITCH])
  1994                         printf("    Line %ld column %d - Spaced punctuation?\n",
  1995 			  linecnt,i+1);
  1996                     else
  1997                         cnt_punct++;
  1998 		}
  1999 	    }
  2000 	}
  2001         /*
  2002 	 * Special case " .X" where X is any alpha.
  2003          * This plugs a hole in the acronym code above.
  2004 	 * Inelegant, but maintainable.
  2005 	 */
  2006         llen=strlen(aline);
  2007         for (i=1;i<llen;i++)
  2008 	{
  2009 	    /* for each character in the line after the first */
  2010             if (aline[i]=='.')
  2011 	    {
  2012 		/* if it's a period */
  2013                 if (aline[i-1]==CHAR_SPACE && gcisalpha(aline[i+1]))
  2014 		{
  2015 		    /*
  2016 		     * If the period follows a space and
  2017 		     * is followed by a letter.
  2018 		     */
  2019                     if (pswit[ECHO_SWITCH])
  2020 			printf("\n%s\n",aline);
  2021                     if (!pswit[OVERVIEW_SWITCH])
  2022                         printf("    Line %ld column %d - Spaced punctuation?\n",
  2023 			  linecnt,i+1);
  2024                     else
  2025                         cnt_punct++;
  2026 		}
  2027 	    }
  2028 	}
  2029         for (i=1;i<llen;i++)
  2030 	{
  2031 	    /* for each character in the line after the first */
  2032             if (aline[i]==CHAR_DQUOTE)
  2033 	    {
  2034                 if (!strchr(" _-.'`,;:!/([{?}])",aline[i-1]) &&
  2035 		  !strchr(" _-.'`,;:!/([{?}])",aline[i+1]) && aline[i+1] ||
  2036 		  !strchr(" _-([{'`",aline[i-1]) && gcisalpha(aline[i+1]))
  2037 		{
  2038 		    if (pswit[ECHO_SWITCH])
  2039 			printf("\n%s\n",aline);
  2040 		    if (!pswit[OVERVIEW_SWITCH])
  2041 			printf("    Line %ld column %d - Unspaced quotes?\n",
  2042 			  linecnt,i+1);
  2043 		    else
  2044 			cnt_punct++;
  2045 		}
  2046 	    }
  2047 	}
  2048         /* Check parity of quotes. */
  2049         for (s=aline;*s;s++)
  2050 	{
  2051             if (*s==CHAR_DQUOTE)
  2052 	    {
  2053                 if (!(dquotepar=!dquotepar))
  2054 		{
  2055 		    /* parity even */
  2056                     if (!strchr("_-.'`/,;:!?)]} ",s[1]))
  2057 		    {
  2058                         if (pswit[ECHO_SWITCH])
  2059 			    printf("\n%s\n",aline);
  2060                         if (!pswit[OVERVIEW_SWITCH])
  2061                             printf("    Line %ld column %d - "
  2062 			      "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
  2063                         else
  2064                             cnt_punct++;
  2065 		    }
  2066 		}
  2067                 else
  2068 		{
  2069 		    /* parity odd */
  2070                     if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
  2071 		      !strchr("_-/.'`([{$",s[1]) || !s[1])
  2072 		    {
  2073                         if (pswit[ECHO_SWITCH])
  2074 			    printf("\n%s\n",aline);
  2075                         if (!pswit[OVERVIEW_SWITCH])
  2076                             printf("    Line %ld column %d - "
  2077 			      "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
  2078                         else
  2079                             cnt_punct++;
  2080 		    }
  2081 		}
  2082 	    }
  2083 	}
  2084 	if (*aline==CHAR_DQUOTE)
  2085 	{
  2086 	    if (strchr(",;:!?)]} ",aline[1]))
  2087 	    {
  2088 		if (pswit[ECHO_SWITCH])
  2089 		    printf("\n%s\n",aline);
  2090 		if (!pswit[OVERVIEW_SWITCH])
  2091 		    printf("    Line %ld column 1 - Wrongspaced quotes?\n",
  2092 		      linecnt);
  2093 		else
  2094 		    cnt_punct++;
  2095 	    }
  2096 	}
  2097         if (pswit[SQUOTE_SWITCH])
  2098 	{
  2099             for (s=aline;*s;s++)
  2100 	    {
  2101                 if ((*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE) &&
  2102 		  (s==aline || s>aline && !gcisalpha(s[-1]) ||
  2103 		  !gcisalpha(s[1])))
  2104 		{
  2105                     if (!(squotepar=!squotepar))
  2106 		    {
  2107 			/* parity even */
  2108                         if (!strchr("_-.'`/\",;:!?)]} ",s[1]))
  2109 			{
  2110                             if (pswit[ECHO_SWITCH])
  2111 				printf("\n%s\n",aline);
  2112                             if (!pswit[OVERVIEW_SWITCH])
  2113                                 printf("    Line %ld column %d - "
  2114 				  "Wrongspaced singlequotes?\n",
  2115 				  linecnt,(int)(s-aline)+1);
  2116                             else
  2117                                 cnt_punct++;
  2118 			}
  2119 		    }
  2120                     else
  2121 		    {
  2122 			/* parity odd */
  2123                         if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
  2124 			  !strchr("_-/\".'`",s[1]) || !s[1])
  2125 			{
  2126                             if (pswit[ECHO_SWITCH])
  2127 				printf("\n%s\n",aline);
  2128                             if (!pswit[OVERVIEW_SWITCH])
  2129                                 printf("    Line %ld column %d - "
  2130 				  "Wrongspaced singlequotes?\n",
  2131 				  linecnt,(int)(s-aline)+1);
  2132                             else
  2133                                 cnt_punct++;
  2134 			}
  2135 		    }
  2136 		}
  2137 	    }
  2138 	}
  2139         /*
  2140 	 * Look for double punctuation like ,. or ,,
  2141          * Thanks to DW for the suggestion!
  2142          * In books with references, ".," and ".;" are common
  2143          * e.g. "etc., etc.," and vol. 1.; vol 3.;
  2144          * OTOH, from my initial tests, there are also fairly
  2145          * common errors. What to do? Make these cases paranoid?
  2146          * ".," is the most common, so warnings->dotcomma is used
  2147          * to suppress detailed reporting if it occurs often.
  2148 	 */
  2149         llen=strlen(aline);
  2150         for (i=0;i<llen;i++)
  2151 	{
  2152 	    /* for each punctuation character in the line */
  2153             if (strchr(".?!,;:",aline[i]) && (strchr(".?!,;:",aline[i+1])) &&
  2154 	      aline[i] && aline[i+1])
  2155 	    {
  2156 		/* followed by punctuation, it's a query, unless . . . */
  2157                 if (aline[i]==aline[i+1] && (aline[i]=='.' || aline[i]=='?' ||
  2158 		  aline[i]=='!') ||
  2159 		  !warnings->dotcomma && aline[i]=='.' && aline[i+1]==',' ||
  2160 		  warnings->isFrench && !strncmp(aline+i,",...",4) ||
  2161 		  warnings->isFrench && !strncmp(aline+i,"...,",4) ||
  2162 		  warnings->isFrench && !strncmp(aline+i,";...",4) ||
  2163 		  warnings->isFrench && !strncmp(aline+i,"...;",4) ||
  2164 		  warnings->isFrench && !strncmp(aline+i,":...",4) ||
  2165 		  warnings->isFrench && !strncmp(aline+i,"...:",4) ||
  2166 		  warnings->isFrench && !strncmp(aline+i,"!...",4) ||
  2167 		  warnings->isFrench && !strncmp(aline+i,"...!",4) ||
  2168 		  warnings->isFrench && !strncmp(aline+i,"?...",4) ||
  2169 		  warnings->isFrench && !strncmp(aline+i,"...?",4))
  2170 		{
  2171 		    if (warnings->isFrench && !strncmp(aline+i,",...",4) ||
  2172 		      warnings->isFrench && !strncmp(aline+i,"...,",4) ||
  2173 		      warnings->isFrench && !strncmp(aline+i,";...",4) ||
  2174 		      warnings->isFrench && !strncmp(aline+i,"...;",4) ||
  2175 		      warnings->isFrench && !strncmp(aline+i,":...",4) ||
  2176 		      warnings->isFrench && !strncmp(aline+i,"...:",4) ||
  2177 		      warnings->isFrench && !strncmp(aline+i,"!...",4) ||
  2178 		      warnings->isFrench && !strncmp(aline+i,"...!",4) ||
  2179 		      warnings->isFrench && !strncmp(aline+i,"?...",4) ||
  2180 		      warnings->isFrench && !strncmp(aline+i,"...?",4))
  2181 			i+=4;
  2182 		    ; /* do nothing for .. !! and ?? which can be legit */
  2183 		}
  2184                 else
  2185 		{
  2186                     if (pswit[ECHO_SWITCH])
  2187 			printf("\n%s\n",aline);
  2188                     if (!pswit[OVERVIEW_SWITCH])
  2189                         printf("    Line %ld column %d - Double punctuation?\n",
  2190 			  linecnt,i+1);
  2191                     else
  2192                         cnt_punct++;
  2193 		}
  2194 	    }
  2195 	}
  2196         s=aline;
  2197         while (strstr(s," \" "))
  2198 	{
  2199             if (pswit[ECHO_SWITCH])
  2200 		printf("\n%s\n",aline);
  2201             if (!pswit[OVERVIEW_SWITCH])
  2202                 printf("    Line %ld column %d - Spaced doublequote?\n",
  2203 		  linecnt,(int)(strstr(s," \" ")-aline+1));
  2204             else
  2205                 cnt_punct++;
  2206             s=strstr(s," \" ")+2;
  2207 	}
  2208         s=aline;
  2209         while (strstr(s," ' "))
  2210 	{
  2211             if (pswit[ECHO_SWITCH])
  2212 		printf("\n%s\n",aline);
  2213             if (!pswit[OVERVIEW_SWITCH])
  2214                 printf("    Line %ld column %d - Spaced singlequote?\n",
  2215 		  linecnt,(int)(strstr(s," ' ")-aline+1));
  2216             else
  2217                 cnt_punct++;
  2218             s=strstr(s," ' ")+2;
  2219 	}
  2220         s=aline;
  2221         while (strstr(s," ` "))
  2222 	{
  2223             if (pswit[ECHO_SWITCH])
  2224 		printf("\n%s\n",aline);
  2225             if (!pswit[OVERVIEW_SWITCH])
  2226                 printf("    Line %ld column %d - Spaced singlequote?\n",
  2227 		  linecnt,(int)(strstr(s," ` ")-aline+1));
  2228             else
  2229                 cnt_punct++;
  2230             s=strstr(s," ` ")+2;
  2231 	}
  2232         /* check special case of 'S instead of 's at end of word */
  2233         s=aline+1;
  2234         while (*s)
  2235 	{
  2236             if (*s==CHAR_SQUOTE && s[1]=='S' && s[-1]>='a' && s[-1]<='z')
  2237 	    {
  2238                 if (pswit[ECHO_SWITCH])
  2239 		    printf("\n%s\n",aline);
  2240                 if (!pswit[OVERVIEW_SWITCH])
  2241                     printf("    Line %ld column %d - Capital \"S\"?\n",
  2242 		      linecnt,(int)(s-aline+2));
  2243                 else
  2244                     cnt_punct++;
  2245 	    }
  2246             s++;
  2247 	}
  2248         /*
  2249 	 * Now check special cases - start and end of line -
  2250          * for single and double quotes. Start is sometimes [sic]
  2251          * but better to query it anyway.
  2252          * While we're here, check for dash at end of line.
  2253 	 */
  2254         llen=strlen(aline);
  2255         if (llen>1)
  2256 	{
  2257             if (aline[llen-1]==CHAR_DQUOTE || aline[llen-1]==CHAR_SQUOTE ||
  2258 	      aline[llen-1]==CHAR_OPEN_SQUOTE)
  2259                 if (aline[llen-2]==CHAR_SPACE)
  2260 		{
  2261                     if (pswit[ECHO_SWITCH])
  2262 			printf("\n%s\n",aline);
  2263                     if (!pswit[OVERVIEW_SWITCH])
  2264                         printf("    Line %ld column %d - Spaced quote?\n",
  2265 			  linecnt,llen);
  2266                     else
  2267                         cnt_punct++;
  2268 		}
  2269             if ((aline[0]==CHAR_SQUOTE || aline[0]==CHAR_OPEN_SQUOTE) &&
  2270 	      aline[1]==CHAR_SPACE)
  2271 	    {
  2272 		if (pswit[ECHO_SWITCH])
  2273 		    printf("\n%s\n",aline);
  2274 		if (!pswit[OVERVIEW_SWITCH])
  2275 		    printf("    Line %ld column 1 - Spaced quote?\n",linecnt);
  2276 		else
  2277 		    cnt_punct++;
  2278 	    }
  2279             /*
  2280 	     * Dash at end of line may well be legit - paranoid mode only
  2281              * and don't report em-dash at line-end.
  2282 	     */
  2283             if (pswit[PARANOID_SWITCH] && warnings->hyphen)
  2284 	    {
  2285                 for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
  2286 		    ;
  2287                 if (aline[i]=='-' && aline[i-1]!='-')
  2288 		{
  2289                     if (pswit[ECHO_SWITCH])
  2290 			printf("\n%s\n",aline);
  2291                     if (!pswit[OVERVIEW_SWITCH])
  2292                         printf("    Line %ld column %d - "
  2293 			  "Hyphen at end of line?\n",linecnt,i);
  2294 		}
  2295 	    }
  2296 	}
  2297         /*
  2298 	 * Brackets are often unspaced, but shouldn't be surrounded by alpha.
  2299          * If so, suspect a scanno like "a]most".
  2300 	 */
  2301         llen=strlen(aline);
  2302         for (i=1;i<llen-1;i++)
  2303 	{
  2304 	    /* for each bracket character in the line except 1st & last */
  2305             if (strchr("{[()]}",aline[i]) && gcisalpha(aline[i-1]) &&
  2306 	      gcisalpha(aline[i+1]))
  2307 	    {
  2308                 if (pswit[ECHO_SWITCH])
  2309 		    printf("\n%s\n",aline);
  2310                 if (!pswit[OVERVIEW_SWITCH])
  2311                     printf("    Line %ld column %d - Unspaced bracket?\n",
  2312 		      linecnt,i);
  2313                 else
  2314                     cnt_punct++;
  2315 	    }
  2316 	}
  2317         llen=strlen(aline);
  2318         if (warnings->endquote)
  2319 	{
  2320             for (i=1;i<llen;i++)
  2321 	    {
  2322 		/* for each character in the line except 1st */
  2323                 if (aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
  2324 		{
  2325 		    if (pswit[ECHO_SWITCH])
  2326 			printf("\n%s\n",aline);
  2327 		    if (!pswit[OVERVIEW_SWITCH])
  2328 			printf("    Line %ld column %d - "
  2329 			  "endquote missing punctuation?\n",linecnt,i);
  2330 		    else
  2331 			cnt_punct++;
  2332 		}
  2333 	    }
  2334 	}
  2335 	/*
  2336          * Check for <HTML TAG>.
  2337          * If there is a < in the line, followed at some point
  2338          * by a > then we suspect HTML.
  2339 	 */
  2340         if (strstr(aline,"<") && strstr(aline,">"))
  2341 	{
  2342             i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);
  2343             if (i>0)
  2344 	    {
  2345                 strncpy(wrk,strstr(aline,"<"),i);
  2346                 wrk[i]=0;
  2347                 if (pswit[ECHO_SWITCH])
  2348 		    printf("\n%s\n",aline);
  2349                 if (!pswit[OVERVIEW_SWITCH])
  2350                     printf("    Line %ld column %d - HTML Tag? %s \n",
  2351 		      linecnt,(int)(strstr(aline,"<")-aline)+1,wrk);
  2352                 else
  2353                     cnt_html++;
  2354 	    }
  2355 	}
  2356         /*
  2357 	 * Check for &symbol; HTML.
  2358          * If there is a & in the line, followed at
  2359          * some point by a ; then we suspect HTML.
  2360 	 */
  2361         if (strstr(aline,"&") && strstr(aline,";"))
  2362 	{
  2363             i=(int)(strstr(aline,";")-strstr(aline,"&")+1);
  2364             for (s=strstr(aline,"&");s<strstr(aline,";");s++)   
  2365                 if (*s==CHAR_SPACE)
  2366 		    i=0;                /* Don't report "Jones & Son;" */
  2367             if (i>0)
  2368 	    {
  2369                 strncpy(wrk,strstr(aline,"&"),i);
  2370                 wrk[i]=0;
  2371                 if (pswit[ECHO_SWITCH])
  2372 		    printf("\n%s\n",aline);
  2373                 if (!pswit[OVERVIEW_SWITCH])
  2374                     printf("    Line %ld column %d - HTML symbol? %s \n",
  2375 		      linecnt,(int)(strstr(aline,"&")-aline)+1,wrk);
  2376                 else
  2377                     cnt_html++;
  2378 	    }
  2379 	}
  2380         /*
  2381 	 * At end of paragraph, check for mismatched quotes.
  2382          * We don't want to report an error immediately, since it is a
  2383          * common convention to omit the quotes at end of paragraph if
  2384          * the next paragraph is a continuation of the same speaker.
  2385          * Where this is the case, the next para should begin with a
  2386          * quote, so we store the warning message and only display it
  2387          * at the top of the next iteration if the new para doesn't
  2388          * start with a quote.
  2389          * The -p switch overrides this default, and warns of unclosed
  2390          * quotes on _every_ paragraph, whether the next begins with a
  2391          * quote or not.
  2392 	 */
  2393         if (isemptyline)
  2394 	{
  2395 	    /* end of para - add up the totals */
  2396             if (quot%2)
  2397                 sprintf(dquote_err,"    Line %ld - Mismatched quotes\n",
  2398 		  linecnt);
  2399             if (pswit[SQUOTE_SWITCH] && open_single_quote &&
  2400 	      open_single_quote!=close_single_quote)
  2401                 sprintf(squote_err,"    Line %ld - Mismatched singlequotes?\n",
  2402 		  linecnt);
  2403             if (pswit[SQUOTE_SWITCH] && open_single_quote &&
  2404 	      open_single_quote!=close_single_quote &&
  2405 	      open_single_quote!=close_single_quote+1)
  2406 		/*
  2407 		 * Flag it to be noted regardless of the
  2408 		 * first char of the next para.
  2409 		 */
  2410                 squot=1;
  2411             if (r_brack)
  2412                 sprintf(rbrack_err,"    Line %ld - "
  2413 		  "Mismatched round brackets?\n",linecnt);
  2414             if (s_brack)
  2415                 sprintf(sbrack_err,"    Line %ld - "
  2416 		  "Mismatched square brackets?\n",linecnt);
  2417             if (c_brack)
  2418                 sprintf(cbrack_err,"    Line %ld - "
  2419 		  "Mismatched curly brackets?\n",linecnt);
  2420             if (c_unders%2)
  2421                 sprintf(unders_err,"    Line %ld - Mismatched underscores?\n",
  2422 		  linecnt);
  2423             quot=s_brack=c_brack=r_brack=c_unders=open_single_quote=
  2424 	      close_single_quote=0;
  2425 	    /* let the next iteration know that it's starting a new para */
  2426             isnewpara=1;
  2427 	}
  2428         /*
  2429 	 * Check for omitted punctuation at end of paragraph by working back
  2430 	 * through prevline. DW.
  2431          * Need to check this only for "normal" paras.
  2432          * So what is a "normal" para?
  2433          *    Not normal if one-liner (chapter headings, etc.)
  2434          *    Not normal if doesn't contain at least one locase letter
  2435          *    Not normal if starts with space
  2436 	 */
  2437         if (isemptyline)
  2438 	{
  2439 	    /* end of para */
  2440             for (s=prevline,i=0;*s && !i;s++)
  2441                 if (gcisletter(*s))
  2442 		    /* use i to indicate the presence of a letter on the line */
  2443                     i=1;
  2444             /*
  2445 	     * This next "if" is a problem.
  2446              * If we say "start_para_line <= linecnt - 1", that includes
  2447 	     * one-line "paragraphs" like chapter heads. Lotsa false positives.
  2448              * If we say "start_para_line < linecnt - 1" it doesn't, but then it
  2449              * misses genuine one-line paragraphs.
  2450 	     */
  2451             if (i && lastblen>2 && start_para_line<linecnt-1 &&
  2452 	      *prevline>CHAR_SPACE)
  2453 	    {
  2454                 for (i=strlen(prevline)-1;
  2455 		  (prevline[i]==CHAR_DQUOTE || prevline[i]==CHAR_SQUOTE) &&
  2456 		  prevline[i]>CHAR_SPACE && i>0;
  2457 		  i--)
  2458 		    ;
  2459                 for (;i>0;i--)
  2460 		{
  2461                     if (gcisalpha(prevline[i]))
  2462 		    {
  2463                         if (pswit[ECHO_SWITCH])
  2464 			    printf("\n%s\n",prevline);
  2465                         if (!pswit[OVERVIEW_SWITCH])
  2466                             printf("    Line %ld column %d - "
  2467 			      "No punctuation at para end?\n",
  2468 			      linecnt-1,strlen(prevline));
  2469                         else
  2470                             cnt_punct++;
  2471                         break;
  2472 		    }
  2473                     if (strchr("-.:!([{?}])",prevline[i]))
  2474                         break;
  2475 		}
  2476 	    }
  2477 	}
  2478         strcpy(prevline,aline);
  2479     }
  2480     fclose(infile);
  2481     if (!pswit[OVERVIEW_SWITCH])
  2482         for (i=0;i<MAX_QWORD;i++)
  2483             if (dupcnt[i])
  2484                 printf("\nNote: Queried word %s was duplicated %d time%s\n",
  2485 		  qword[i],dupcnt[i],"s");
  2486 }
  2487 
  2488 /*
  2489  * flgets:
  2490  *
  2491  * Get one line from the input stream, checking for
  2492  * the existence of exactly one CR/LF line-end per line.
  2493  *
  2494  * Returns: a pointer to the line.
  2495  */
  2496 char *flgets(char *theline,int maxlen,FILE *thefile,long lcnt)
  2497 {
  2498     char c;
  2499     int len,isCR,cint;
  2500     *theline=0;
  2501     len=isCR=0;
  2502     c=cint=fgetc(thefile);
  2503     do
  2504     {
  2505         if (cint==EOF)
  2506             return NULL;
  2507 	/* either way, it's end of line */
  2508         if (c==10)
  2509 	{
  2510             if (isCR)
  2511                 break;
  2512             else
  2513 	    {
  2514 		/* Error - a LF without a preceding CR */
  2515                 if (pswit[LINE_END_SWITCH])
  2516 		{
  2517                     if (pswit[ECHO_SWITCH])
  2518 			printf("\n%s\n",theline);
  2519                     if (!pswit[OVERVIEW_SWITCH])
  2520                         printf("    Line %ld - No CR?\n",lcnt);
  2521                     else
  2522                         cnt_lineend++;
  2523 		}
  2524                 break;
  2525 	    }
  2526 	}
  2527         if (c==13)
  2528 	{
  2529             if (isCR)
  2530 	    {
  2531 		/* Error - two successive CRs */
  2532                 if (pswit[LINE_END_SWITCH])
  2533 		{
  2534                     if (pswit[ECHO_SWITCH])
  2535 			printf("\n%s\n",theline);
  2536                     if (!pswit[OVERVIEW_SWITCH])
  2537                         printf("    Line %ld - Two successive CRs?\n",lcnt);
  2538                     else
  2539                         cnt_lineend++;
  2540 		}
  2541 	    }
  2542             isCR=1;
  2543 	}
  2544         else
  2545 	{
  2546             if (pswit[LINE_END_SWITCH] && isCR)
  2547 	    {
  2548                 if (pswit[ECHO_SWITCH])
  2549 		    printf("\n%s\n",theline);
  2550                 if (!pswit[OVERVIEW_SWITCH])
  2551                     printf("    Line %ld column %d - CR without LF?\n",
  2552 		      lcnt,len+1);
  2553                 else
  2554                     cnt_lineend++;
  2555 	    }
  2556             theline[len]=c;
  2557             len++;
  2558             theline[len]=0;
  2559             isCR=0;
  2560 	}
  2561         c=cint=fgetc(thefile);
  2562     } while(len<maxlen);
  2563     if (pswit[MARKUP_SWITCH])  
  2564         postprocess_for_HTML(theline);
  2565     if (pswit[DP_SWITCH])  
  2566         postprocess_for_DP(theline);
  2567     return theline;
  2568 }
  2569 
  2570 /*
  2571  * mixdigit:
  2572  *
  2573  * Takes a "word" as a parameter, and checks whether it
  2574  * contains a mixture of alpha and digits. Generally, this is an
  2575  * error, but may not be for cases like 4th or L5 12s. 3d.
  2576  *
  2577  * Returns: 0 if no error found, 1 if error.
  2578  */
  2579 int mixdigit(char *checkword)
  2580 {
  2581     int wehaveadigit,wehavealetter,firstdigits,query,wl;
  2582     char *s;
  2583     wehaveadigit=wehavealetter=query=0;
  2584     for (s=checkword;*s;s++)
  2585         if (gcisalpha(*s))
  2586             wehavealetter=1;
  2587         else
  2588             if (gcisdigit(*s))
  2589                 wehaveadigit=1;
  2590     if (wehaveadigit && wehavealetter)
  2591     {
  2592 	/* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
  2593         query=1;
  2594         wl=strlen(checkword);
  2595         for (firstdigits=0;gcisdigit(checkword[firstdigits]);firstdigits++)
  2596             ;
  2597         /* digits, ending in st, rd, nd, th of either case */
  2598         if (firstdigits+2==wl && (matchword(checkword+wl-2,"st") ||
  2599 	  matchword(checkword+wl-2,"rd") || matchword(checkword+wl-2,"nd") ||
  2600 	  matchword(checkword+wl-2,"th")))
  2601 	    query=0;
  2602         if (firstdigits+3==wl && (matchword(checkword+wl-3,"sts") ||
  2603 	  matchword(checkword+wl-3,"rds") || matchword(checkword+wl-3,"nds") ||
  2604 	  matchword(checkword+wl-3,"ths")))
  2605 	    query=0;
  2606         if (firstdigits+3==wl && (matchword(checkword+wl-4,"stly") ||
  2607 	  matchword(checkword+wl-4,"rdly") ||
  2608 	  matchword(checkword+wl-4,"ndly") || matchword(checkword+wl-4,"thly")))
  2609 	    query=0;
  2610         /* digits, ending in l, L, s or d */
  2611         if (firstdigits+1==wl && (checkword[wl-1]=='l' ||
  2612 	  checkword[wl-1]=='L' || checkword[wl-1]=='s' || checkword[wl-1]=='d'))
  2613 	    query=0;
  2614         /*
  2615 	 * L at the start of a number, representing Britsh pounds, like L500.
  2616          * This is cute. We know the current word is mixeddigit. If the first
  2617          * letter is L, there must be at least one digit following. If both
  2618          * digits and letters follow, we have a genuine error, else we have a
  2619          * capital L followed by digits, and we accept that as a non-error.
  2620 	 */
  2621         if (checkword[0]=='L' && !mixdigit(checkword+1))
  2622 	    query=0;
  2623     }
  2624     return query;
  2625 }
  2626 
  2627 /*
  2628  * getaword:
  2629  *
  2630  * Extracts the first/next "word" from the line, and puts
  2631  * it into "thisword". A word is defined as one English word unit--or
  2632  * at least that's the aim.
  2633  *
  2634  * Returns: a pointer to the position in the line where we will start
  2635  *          looking for the next word.
  2636  */
  2637 char *getaword(char *fromline,char *thisword)
  2638 {
  2639     int i,wordlen;
  2640     char *s;
  2641     wordlen=0;
  2642     for (;!gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline;
  2643       fromline++)
  2644 	;
  2645     /*
  2646      * Use a look-ahead to handle exceptions for numbers like 1,000 and 1.35.
  2647      * Especially yucky is the case of L1,000
  2648      * This section looks for a pattern of characters including a digit
  2649      * followed by a comma or period followed by one or more digits.
  2650      * If found, it returns this whole pattern as a word; otherwise we discard
  2651      * the results and resume our normal programming.
  2652      */
  2653     s=fromline;
  2654     for (;(gcisdigit(*s) || gcisalpha(*s) || *s==',' || *s=='.') &&
  2655       wordlen<MAXWORDLEN;s++)
  2656     {
  2657 	thisword[wordlen]=*s;
  2658         wordlen++;
  2659     }
  2660     thisword[wordlen]=0;
  2661     for (i=1;i<wordlen-1;i++)
  2662     {
  2663         if (thisword[i]=='.' || thisword[i]==',')
  2664 	{
  2665             if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1]))
  2666 	    {
  2667                 fromline=s;
  2668                 return fromline;
  2669 	    }
  2670 	}
  2671     }
  2672     /* we didn't find a punctuated number - do the regular getword thing */
  2673     wordlen=0;
  2674     for (;(gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline=='\'') &&
  2675       wordlen<MAXWORDLEN;fromline++)
  2676     {
  2677         thisword[wordlen]=*fromline;
  2678         wordlen++;
  2679     }
  2680     thisword[wordlen]=0;
  2681     return fromline;
  2682 }
  2683 
  2684 /*
  2685  * matchword:
  2686  *
  2687  * A case-insensitive string matcher.
  2688  */
  2689 int matchword(char *checkfor,char *thisword)
  2690 {
  2691     unsigned int ismatch,i;
  2692     if (strlen(checkfor)!=strlen(thisword))
  2693 	return 0;
  2694     ismatch=1;     /* assume a match until we find a difference */
  2695     for (i=0;i<strlen(checkfor);i++)
  2696         if (toupper(checkfor[i])!=toupper(thisword[i]))
  2697             ismatch=0;
  2698     return ismatch;
  2699 }
  2700 
  2701 /*
  2702  * lowerit:
  2703  *
  2704  * Lowercase the line.
  2705  */
  2706 
  2707 void lowerit(char *theline)
  2708 {
  2709     for (;*theline;theline++)
  2710         if (*theline>='A' && *theline<='Z')
  2711             *theline+=32;
  2712 }
  2713 
  2714 /*
  2715  * isroman:
  2716  *
  2717  * Is this word a Roman Numeral?
  2718  *
  2719  * It doesn't actually validate that the number is a valid Roman Numeral--for
  2720  * example it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not
  2721  * what we're here to do. If it passes this, it LOOKS like a Roman numeral.
  2722  * Anyway, the actual Romans were pretty tolerant of bad arithmetic, or
  2723  * expressions thereof, except when it came to taxes. Allow any number of M,
  2724  * an optional D, an optional CM or CD, any number of optional Cs, an optional
  2725  * XL or an optional XC, an optional IX or IV, an optional V and any number
  2726  * of optional Is.
  2727  */
  2728 int isroman(char *t)
  2729 {
  2730     char *s;
  2731     if (!t || !*t)
  2732 	return 0;
  2733     s=t;
  2734     while (*t=='m' && *t)
  2735 	t++;
  2736     if (*t=='d')
  2737 	t++;
  2738     if (*t=='c' && t[1]=='m')
  2739 	t+=2;
  2740     if (*t=='c' && t[1]=='d')
  2741 	t+=2;
  2742     while (*t=='c' && *t)
  2743 	t++;
  2744     if (*t=='x' && t[1]=='l')
  2745 	t+=2;
  2746     if (*t=='x' && t[1]=='c')
  2747 	t+=2;
  2748     if (*t=='l')
  2749 	t++;
  2750     while (*t=='x' && *t)
  2751 	t++;
  2752     if (*t=='i' && t[1]=='x')
  2753 	t+=2;
  2754     if (*t=='i' && t[1]=='v')
  2755 	t+=2;
  2756     if (*t=='v')
  2757 	t++;
  2758     while (*t=='i' && *t)
  2759 	t++;
  2760     return !*t;
  2761 }
  2762 
  2763 /*
  2764  * gcisalpha:
  2765  *
  2766  * A version of isalpha() that is somewhat lenient on 8-bit texts.
  2767  * If we use the standard function, 8-bit accented characters break
  2768  * words, so that tete with accented characters appears to be two words, "t"
  2769  * and "t", with 8-bit characters between them. This causes over-reporting of
  2770  * errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)
  2771  * and ISO-8859-1 character sets, which are the most common PG 8-bit types.
  2772  */
  2773 int gcisalpha(unsigned char c)
  2774 {
  2775     if (c>='a' && c<='z')
  2776 	return 1;
  2777     if (c>='A' && c<='Z')
  2778 	return 1;
  2779     if (c<140)
  2780 	return 0;
  2781     if (c>=192 && c!=208 && c!=215 && c!=222 && c!=240 && c!=247 && c!=254)
  2782 	return 1;
  2783     if (c==140 || c==142 || c==156 || c==158 || c==159)
  2784 	return 1;
  2785     return 0;
  2786 }
  2787 
  2788 /*
  2789  * gcisdigit:
  2790  *
  2791  * A version of isdigit() that doesn't get confused in 8-bit texts.
  2792  */
  2793 int gcisdigit(unsigned char c)
  2794 {   
  2795     return c>='0' && c<='9';
  2796 }
  2797 
  2798 /*
  2799  * gcisletter:
  2800  *
  2801  * A version of isletter() that doesn't get confused in 8-bit texts.
  2802  * NB: this is ISO-8891-1-specific.
  2803  */
  2804 int gcisletter(unsigned char c)
  2805 {   
  2806     return c>='A' && c<='Z' || c>='a' && c<='z' || c>=192;
  2807 }
  2808 
  2809 /*
  2810  * gcstrchr:
  2811  *
  2812  * Wraps strchr to return NULL if the character being searched for is zero.
  2813  */
  2814 char *gcstrchr(char *s,char c)
  2815 {
  2816     if (!c)
  2817 	return NULL;
  2818     return strchr(s,c);
  2819 }
  2820 
  2821 /*
  2822  * postprocess_for_DP:
  2823  *
  2824  * Invoked with the -d switch from flgets().
  2825  * It simply "removes" from the line a hard-coded set of common
  2826  * DP-specific tags, so that the line passed to the main routine has
  2827  * been pre-cleaned of DP markup.
  2828  */
  2829 void postprocess_for_DP(char *theline)
  2830 {
  2831     char *s,*t;
  2832     int i;
  2833     if (!*theline) 
  2834         return;
  2835     for (i=0;*DPmarkup[i];i++)
  2836     {
  2837         s=strstr(theline,DPmarkup[i]);
  2838         while (s)
  2839 	{
  2840             t=s+strlen(DPmarkup[i]);
  2841             while (*t)
  2842 	    {
  2843                 *s=*t;
  2844                 t++;
  2845 		s++;
  2846 	    }
  2847             *s=0;
  2848             s=strstr(theline,DPmarkup[i]);
  2849 	}
  2850     }
  2851 }
  2852 
  2853 /*
  2854  * postprocess_for_HTML:
  2855  *
  2856  * Invoked with the -m switch from flgets().
  2857  * It simply "removes" from the line a hard-coded set of common
  2858  * HTML tags and "replaces" a hard-coded set of common HTML
  2859  * entities, so that the line passed to the main routine has
  2860  * been pre-cleaned of HTML.
  2861  */
  2862 void postprocess_for_HTML(char *theline)
  2863 {
  2864     if (strstr(theline,"<") && strstr(theline,">"))
  2865         while (losemarkup(theline))
  2866             ;
  2867     while (loseentities(theline))
  2868         ;
  2869 }
  2870 
  2871 char *losemarkup(char *theline)
  2872 {
  2873     char *s,*t;
  2874     int i;
  2875     if (!*theline) 
  2876         return NULL;
  2877     s=strstr(theline,"<");
  2878     t=strstr(theline,">");
  2879     if (!s || !t)
  2880 	return NULL;
  2881     for (i=0;*markup[i];i++)
  2882         if (!tagcomp(s+1,markup[i]))
  2883 	{
  2884             if (!t[1])
  2885 	    {
  2886                 *s=0;
  2887                 return s;
  2888 	    }
  2889             else if (t>s)
  2890 	    {
  2891 		strcpy(s,t+1);
  2892 		return s;
  2893 	    }
  2894         }
  2895     /* It's an unrecognized <xxx>. */
  2896     return NULL;
  2897 }
  2898 
  2899 char *loseentities(char *theline)
  2900 {
  2901     int i;
  2902     char *s,*t;
  2903     if (!*theline) 
  2904         return NULL;
  2905     for (i=0;*entities[i].htmlent;i++)
  2906     {
  2907         s=strstr(theline,entities[i].htmlent);
  2908         if (s)
  2909 	{
  2910             t=malloc((size_t)strlen(s));
  2911             if (!t)
  2912 		return NULL;
  2913             strcpy(t,s+strlen(entities[i].htmlent));
  2914             strcpy(s,entities[i].textent);
  2915             strcat(s,t);
  2916             free(t);
  2917             return theline;
  2918 	}
  2919     }
  2920     for (i=0;*entities[i].htmlnum;i++)
  2921     {
  2922         s=strstr(theline,entities[i].htmlnum);
  2923         if (s)
  2924 	{
  2925             t=malloc((size_t)strlen(s));
  2926             if (!t)
  2927 		return NULL;
  2928             strcpy(t,s+strlen(entities[i].htmlnum));
  2929             strcpy(s,entities[i].textent);
  2930             strcat(s,t);
  2931             free(t);
  2932             return theline;
  2933 	}
  2934     }
  2935     return NULL;
  2936 }
  2937 
  2938 int tagcomp(char *strin,char *basetag)
  2939 {
  2940     char *s,*t;
  2941     s=basetag;
  2942     t=strin;
  2943     if (*t=='/')
  2944 	t++; /* ignore a slash */
  2945     while (*s && *t)
  2946     {
  2947         if (tolower(*s)!=tolower(*t))
  2948 	    return 1;
  2949         s++;
  2950 	t++;
  2951     }
  2952     return 0;
  2953 }
  2954 
  2955 void proghelp()
  2956 {
  2957     fputs("Bookloupe version " PACKAGE_VERSION ".\n",stderr);
  2958     fputs("Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
  2959     fputs("Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>.\n",stderr);
  2960     fputs("Bookloupe comes wih ABSOLUTELY NO WARRANTY. "
  2961       "For details, read the file COPYING.\n",stderr);
  2962     fputs("This is Free Software; "
  2963       "you may redistribute it under certain conditions (GPL);\n",stderr);
  2964     fputs("read the file COPYING for details.\n\n",stderr);
  2965     fputs("Usage is: bookloupe [-setpxloyhud] filename\n",stderr);
  2966     fputs("  where -s checks single quotes, -e suppresses echoing lines, "
  2967       "-t checks typos\n",stderr);
  2968     fputs("  -x (paranoid) switches OFF -t and extra checks, "
  2969       "-l turns OFF line-end checks\n",stderr);
  2970     fputs("  -o just displays overview without detail, "
  2971       "-h echoes header fields\n",stderr);
  2972     fputs("  -v (verbose) unsuppresses duplicate reporting, "
  2973       "-m suppresses markup\n",stderr);
  2974     fputs("  -d ignores DP-specific markup,\n",stderr);
  2975     fputs("  -u uses a file gutcheck.typ to query user-defined "
  2976       "possible typos\n",stderr);
  2977     fputs("Sample usage: bookloupe warpeace.txt \n",stderr);
  2978     fputs("\n",stderr);
  2979     fputs("Bookloupe looks for errors in Project Gutenberg(TM) etexts.\n",
  2980       stderr);
  2981     fputs("Bookloupe queries anything it thinks shouldn't be in a PG text; "
  2982       "non-ASCII\n",stderr);
  2983     fputs("characters like accented letters, "
  2984       "lines longer than 75 or shorter than 55,\n",stderr);
  2985     fputs("unbalanced quotes or brackets, "
  2986       "a variety of badly formatted punctuation, \n",stderr);
  2987     fputs("HTML tags, some likely typos. "
  2988       "It is NOT a substitute for human judgement.\n",stderr);
  2989     fputs("\n",stderr);
  2990 }