bookloupe/bookloupe.c
author ali <ali@juiblex.co.uk>
Fri May 24 22:47:16 2013 +0100 (2013-05-24)
changeset 40 b130f135022d
parent 5 f600b0d1fc5d
child 41 68b1403e2971
permissions -rw-r--r--
Switch to Ali's coding conventions
     1 /*************************************************************************/
     2 /* bookloupe--check for assorted weirdnesses in a PG candidate text file */
     3 /*                                                                       */
     4 /* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>                  */
     5 /* Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>                     */
     6 /*                                                                       */
     7 /* This program is free software; you can redistribute it and/or modify  */
     8 /* it under the terms of the GNU General Public License as published by  */
     9 /* the Free Software Foundation; either version 2 of the License, or     */
    10 /* (at your option) any later version.                                   */
    11 /*                                                                       */
    12 /* This program is distributed in the hope that it will be useful,       */
    13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of        */
    14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the          */
    15 /* GNU General Public License for more details.                          */
    16 /*                                                                       */
    17 /* You should have received a copy of the GNU General Public License     */
    18 /* along with this program. If not, see <http://www.gnu.org/licenses/>.  */
    19 /*************************************************************************/
    20 
    21 #include <stdio.h>
    22 #include <stdlib.h>
    23 #include <string.h>
    24 #include <ctype.h>
    25 
    26 #define MAXWORDLEN    80    /* max length of one word             */
    27 #define LINEBUFSIZE 2048    /* buffer size for an input line      */
    28 
    29 #define MAX_USER_TYPOS 1000
    30 #define USERTYPO_FILE "gutcheck.typ"
    31 
    32 #ifndef MAX_PATH
    33 #define MAX_PATH 16384
    34 #endif
    35 
    36 char aline[LINEBUFSIZE];
    37 char prevline[LINEBUFSIZE];
    38 
    39 /* Common typos. */
    40 char *typo[] = {
    41     "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane",
    42     "nad", "te", "ig", "acn",  "ahve", "alot", "anbd", "andt", "awya", "aywa",
    43     "bakc", "om", "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt",
    44     "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr", "hmi", "hse",
    45     "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd",
    46     "gerat", "goign", "gruop", "haev", "hda", "hearign", "seeign", "sayign",
    47     "herat", "hge", "hsa", "hsi", "hte", "htere", "htese", "htey", "htis",
    48     "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut",
    49     "loev", "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter",
    50     "omre", "onyl", "otehr", "otu", "owrk", "owuld", "peice", "peices",
    51     "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",
    52     "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe",
    53     "sohw", "stnad", "stopry", "stoyr", "stpo", "tahn", "taht", "tath",
    54     "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge", "thier",
    55     "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne",
    56     "tirne", "tkae", "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey",
    57     "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih", "whihc", "whta",
    58     "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",
    59     "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking",
    60     "wtih", "wuould", "wya", "yera", "yeras", "yersa", "yoiu", "youve",
    61     "ytou", "yuor", "abead", "ahle", "ahout", "ahove", "altbough", "balf",
    62     "bardly", "bas", "bave", "baving", "bebind", "beld", "belp", "belped",
    63     "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge",
    64     "dehates", "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan",
    65     "hegin", "heing", "helieve", "henefit", "hetter", "hetween", "heyond",
    66     "hig", "higber", "huild", "huy", "hy", "jobn", "joh", "meanwbile",
    67     "memher", "memhers", "numher", "numhers", "perbaps", "prohlem", "puhlic",
    68     "witbout", "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud",
    69     "prornise", "prornised", "modem", "bo", "heside", "chapteb", "chaptee",
    70     "se", ""
    71 };
    72 
    73 char *usertypo[MAX_USER_TYPOS];
    74 
    75 /* Common abbreviations and other OK words not to query as typos. */
    76 char *okword[] = {
    77     "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm",
    78     "rd", "sh", "br", "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd",
    79     "pompeii","hawaii","hawaiian", "hotbed", "heartbeat", "heartbeats",
    80     "outbid", "outbids", "frostbite", "frostbitten", ""
    81 };
    82 
    83 /* Common abbreviations that cause otherwise unexplained periods. */
    84 char *abbrev[] = {
    85     "cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op",
    86     "cit", "deg", "min", "chap", "oz", "mme", "mlle", "mssrs", ""
    87 };
    88 
    89 /*
    90  * Two-Letter combinations that rarely if ever start words,
    91  * but are common scannos or otherwise common letter combinations.
    92  */
    93 char *nostart[] = {
    94     "hr", "hl", "cb", "sb", "tb", "wb", "tl", "tn", "rn", "lt", "tj", ""
    95 };
    96 
    97 /*
    98  * Two-Letter combinations that rarely if ever end words,
    99  * but are common scannos or otherwise common letter combinations.
   100  */
   101 char *noend[] = {
   102     "cb", "gb", "pb", "sb", "tb", "wh", "fr", "br", "qu", "tw", "gl", "fl",
   103     "sw", "gr", "sl", "cl", "iy", ""
   104 };
   105 
   106 char *markup[] = {
   107     "a", "b", "big", "blockquote", "body", "br", "center", "col", "div", "em",
   108     "font", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i",
   109     "img", "li", "meta", "ol", "p", "pre", "small", "span", "strong", "sub",
   110     "sup", "table", "td", "tfoot", "thead", "title", "tr", "tt", "u", "ul", ""
   111 };
   112 
   113 char *DPmarkup[] = {
   114     "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>", ""
   115 };
   116 
   117 char *nocomma[] = {
   118     "the", "it's", "their", "an", "mrs", "a", "our", "that's", "its", "whose",
   119     "every", "i'll", "your", "my", "mr", "mrs", "mss", "mssrs", "ft", "pm",
   120     "st", "dr", "rd", "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "i'm",
   121     "during", "let", "toward", "among", ""
   122 };
   123 
   124 char *noperiod[] = {
   125     "every", "i'm", "during", "that's", "their", "your", "our", "my", "or",
   126     "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether",
   127     "i'll", "whose", "who", "because", "when", "let", "till", "very", "an",
   128     "among", "those", "into", "whom", "having", "thence", ""
   129 }; 
   130 
   131 char vowels[] = "aeiouàáâãäæèéêëìíîïòóôõöùúûü";
   132 
   133 struct {
   134     char *htmlent;
   135     char *htmlnum;
   136     char *textent;
   137 } entities[] = {
   138     "&amp;",	"&#38;",     "&", 
   139     "&lt;",	"&#60;",     "<",
   140     "&gt;",	"&#62;",     ">",
   141     "&deg;",	"&#176;",    " degrees",
   142     "&pound;",	"&#163;",    "L",
   143     "&quot;",	"&#34;",     "\"", /* quotation mark = APL quote */
   144     "&OElig;",	"&#338;",    "OE", /* latin capital ligature OE */
   145     "&oelig;",	"&#339;",    "oe", /* latin small ligature oe */
   146     "&Scaron;",	"&#352;",    "S", /* latin capital letter S with caron */
   147     "&scaron;",	"&#353;",    "s", /* latin small letter s with caron */
   148     "&Yuml;",	"&#376;",    "Y", /* latin capital letter Y with diaeresis */
   149     "&circ;",	"&#710;",    "",  /* modifier letter circumflex accent */
   150     "&tilde;",	"&#732;",    "~", /* small tilde, U+02DC ISOdia */
   151     "&ensp;",	"&#8194;",   " ", /* en space, U+2002 ISOpub */
   152     "&emsp;",	"&#8195;",   " ", /* em space, U+2003 ISOpub */
   153     "&thinsp;",	"&#8201;",   " ", /* thin space, U+2009 ISOpub */
   154     "&ndash;",	"&#8211;",   "-", /* en dash, U+2013 ISOpub */
   155     "&mdash;",	"&#8212;",   "--", /* em dash, U+2014 ISOpub */
   156     "&rsquo;",	"&#8217;",   "'", /* right single quotation mark */
   157     "&sbquo;",	"&#8218;",   "'", /* single low-9 quotation mark */
   158     "&ldquo;",	"&#8220;",   "\"", /* left double quotation mark */
   159     "&rdquo;",	"&#8221;",   "\"", /* right double quotation mark */
   160     "&bdquo;",	"&#8222;",   "\"", /* double low-9 quotation mark */
   161     "&lsaquo;",	"&#8249;",   "\"", /* single left-pointing angle quotation mark */
   162     "&rsaquo;",	"&#8250;",   "\"", /* single right-pointing angle quotation mark */
   163     "&nbsp;",	"&#160;",    " ", /* no-break space = non-breaking space, */
   164     "&iexcl;",	"&#161;",    "!", /* inverted exclamation mark */
   165     "&cent;",	"&#162;",    "c", /* cent sign */
   166     "&pound;",	"&#163;",    "L", /* pound sign */
   167     "&curren;",	"&#164;",    "$", /* currency sign */
   168     "&yen;",	"&#165;",    "Y", /* yen sign = yuan sign */
   169     "&sect;",	"&#167;",    "--", /* section sign */
   170     "&uml;",	"&#168;",    " ", /* diaeresis = spacing diaeresis */
   171     "&copy;",	"&#169;",    "(C) ", /* copyright sign */
   172     "&ordf;",	"&#170;",    " ", /* feminine ordinal indicator */
   173     "&laquo;",	"&#171;",    "\"", /* left-pointing double angle quotation mark */
   174     "&shy;",	"&#173;",    "-", /* soft hyphen = discretionary hyphen */
   175     "&reg;",	"&#174;",    "(R) ", /* registered sign = registered trade mark sign */
   176     "&macr;",	"&#175;",    " ", /* macron = spacing macron = overline */
   177     "&deg;",	"&#176;",    " degrees", /* degree sign */
   178     "&plusmn;",	"&#177;",    "+-", /* plus-minus sign = plus-or-minus sign */
   179     "&sup2;",	"&#178;",    "2", /* superscript two = superscript digit two */
   180     "&sup3;",	"&#179;",    "3", /* superscript three = superscript digit three */
   181     "&acute;",	"&#180;",    " ", /* acute accent = spacing acute */
   182     "&micro;",	"&#181;",    "m", /* micro sign */
   183     "&para;",	"&#182;",    "--", /* pilcrow sign = paragraph sign */
   184     "&cedil;",	"&#184;",    " ", /* cedilla = spacing cedilla */
   185     "&sup1;",	"&#185;",    "1", /* superscript one = superscript digit one */
   186     "&ordm;",	"&#186;",    " ", /* masculine ordinal indicator */
   187     "&raquo;",	"&#187;",    "\"", /* right-pointing double angle quotation mark */
   188     "&frac14;",	"&#188;",    "1/4", /* vulgar fraction one quarter */
   189     "&frac12;",	"&#189;",    "1/2", /* vulgar fraction one half */
   190     "&frac34;",	"&#190;",    "3/4", /* vulgar fraction three quarters */
   191     "&iquest;",	"&#191;",    "?", /* inverted question mark */
   192     "&Agrave;",	"&#192;",    "A", /* latin capital letter A with grave */
   193     "&Aacute;",	"&#193;",    "A", /* latin capital letter A with acute */
   194     "&Acirc;",	"&#194;",    "A", /* latin capital letter A with circumflex */
   195     "&Atilde;",	"&#195;",    "A", /* latin capital letter A with tilde */
   196     "&Auml;",	"&#196;",    "A", /* latin capital letter A with diaeresis */
   197     "&Aring;",	"&#197;",    "A", /* latin capital letter A with ring above */
   198     "&AElig;",	"&#198;",    "AE", /* latin capital letter AE */
   199     "&Ccedil;",	"&#199;",    "C", /* latin capital letter C with cedilla */
   200     "&Egrave;",	"&#200;",    "E", /* latin capital letter E with grave */
   201     "&Eacute;",	"&#201;",    "E", /* latin capital letter E with acute */
   202     "&Ecirc;",	"&#202;",    "E", /* latin capital letter E with circumflex */
   203     "&Euml;",	"&#203;",    "E", /* latin capital letter E with diaeresis */
   204     "&Igrave;",	"&#204;",    "I", /* latin capital letter I with grave */
   205     "&Iacute;",	"&#205;",    "I", /* latin capital letter I with acute */
   206     "&Icirc;",	"&#206;",    "I", /* latin capital letter I with circumflex */
   207     "&Iuml;",	"&#207;",    "I", /* latin capital letter I with diaeresis */
   208     "&ETH;",	"&#208;",    "E", /* latin capital letter ETH */
   209     "&Ntilde;",	"&#209;",    "N", /* latin capital letter N with tilde */
   210     "&Ograve;",	"&#210;",    "O", /* latin capital letter O with grave */
   211     "&Oacute;",	"&#211;",    "O", /* latin capital letter O with acute */
   212     "&Ocirc;",	"&#212;",    "O", /* latin capital letter O with circumflex */
   213     "&Otilde;",	"&#213;",    "O", /* latin capital letter O with tilde */
   214     "&Ouml;",	"&#214;",    "O", /* latin capital letter O with diaeresis */
   215     "&times;",	"&#215;",    "*", /* multiplication sign */
   216     "&Oslash;",	"&#216;",    "O", /* latin capital letter O with stroke */
   217     "&Ugrave;",	"&#217;",    "U", /* latin capital letter U with grave */
   218     "&Uacute;",	"&#218;",    "U", /* latin capital letter U with acute */
   219     "&Ucirc;",	"&#219;",    "U", /* latin capital letter U with circumflex */
   220     "&Uuml;",	"&#220;",    "U", /* latin capital letter U with diaeresis */
   221     "&Yacute;",	"&#221;",    "Y", /* latin capital letter Y with acute */
   222     "&THORN;",	"&#222;",    "TH", /* latin capital letter THORN */
   223     "&szlig;",	"&#223;",    "sz", /* latin small letter sharp s = ess-zed */
   224     "&agrave;",	"&#224;",    "a", /* latin small letter a with grave */
   225     "&aacute;",	"&#225;",    "a", /* latin small letter a with acute */
   226     "&acirc;",	"&#226;",    "a", /* latin small letter a with circumflex */
   227     "&atilde;",	"&#227;",    "a", /* latin small letter a with tilde */
   228     "&auml;",	"&#228;",    "a", /* latin small letter a with diaeresis */
   229     "&aring;",	"&#229;",    "a", /* latin small letter a with ring above */
   230     "&aelig;",	"&#230;",    "ae", /* latin small letter ae */
   231     "&ccedil;",	"&#231;",    "c", /* latin small letter c with cedilla */
   232     "&egrave;",	"&#232;",    "e", /* latin small letter e with grave */
   233     "&eacute;",	"&#233;",    "e", /* latin small letter e with acute */
   234     "&ecirc;",	"&#234;",    "e", /* latin small letter e with circumflex */
   235     "&euml;",	"&#235;",    "e", /* latin small letter e with diaeresis */
   236     "&igrave;",	"&#236;",    "i", /* latin small letter i with grave */
   237     "&iacute;",	"&#237;",    "i", /* latin small letter i with acute */
   238     "&icirc;",	"&#238;",    "i", /* latin small letter i with circumflex */
   239     "&iuml;",	"&#239;",    "i", /* latin small letter i with diaeresis */
   240     "&eth;",	"&#240;",    "eth", /* latin small letter eth */
   241     "&ntilde;",	"&#241;",    "n", /* latin small letter n with tilde */
   242     "&ograve;",	"&#242;",    "o", /* latin small letter o with grave */
   243     "&oacute;",	"&#243;",    "o", /* latin small letter o with acute */
   244     "&ocirc;",	"&#244;",    "o", /* latin small letter o with circumflex */
   245     "&otilde;",	"&#245;",    "o", /* latin small letter o with tilde */
   246     "&ouml;",	"&#246;",    "o", /* latin small letter o with diaeresis */
   247     "&divide;",	"&#247;",    "/", /* division sign */
   248     "&oslash;",	"&#248;",    "o", /* latin small letter o with stroke */
   249     "&ugrave;",	"&#249;",    "u", /* latin small letter u with grave */
   250     "&uacute;",	"&#250;",    "u", /* latin small letter u with acute */
   251     "&ucirc;",	"&#251;",    "u", /* latin small letter u with circumflex */
   252     "&uuml;",	"&#252;",    "u", /* latin small letter u with diaeresis */
   253     "&yacute;",	"&#253;",    "y", /* latin small letter y with acute */
   254     "&thorn;",	"&#254;",    "th", /* latin small letter thorn */
   255     "&yuml;",	"&#255;",    "y", /* latin small letter y with diaeresis */
   256     "", ""
   257 };
   258 
   259 /* special characters */
   260 #define CHAR_SPACE        32
   261 #define CHAR_TAB           9
   262 #define CHAR_LF           10
   263 #define CHAR_CR           13
   264 #define CHAR_DQUOTE       34
   265 #define CHAR_SQUOTE       39
   266 #define CHAR_OPEN_SQUOTE  96
   267 #define CHAR_TILDE       126
   268 #define CHAR_ASTERISK     42
   269 #define CHAR_FORESLASH    47
   270 #define CHAR_CARAT        94
   271 
   272 #define CHAR_UNDERSCORE    '_'
   273 #define CHAR_OPEN_CBRACK   '{'
   274 #define CHAR_CLOSE_CBRACK  '}'
   275 #define CHAR_OPEN_RBRACK   '('
   276 #define CHAR_CLOSE_RBRACK  ')'
   277 #define CHAR_OPEN_SBRACK   '['
   278 #define CHAR_CLOSE_SBRACK  ']'
   279 
   280 /* longest and shortest normal PG line lengths */
   281 #define LONGEST_PG_LINE   75
   282 #define WAY_TOO_LONG      80
   283 #define SHORTEST_PG_LINE  55
   284 
   285 #define SWITCHES "ESTPXLOYHWVMUD" /* switches:-                            */
   286                                   /*     D - ignore DP-specific markup     */
   287                                   /*     E - echo queried line             */
   288                                   /*     S - check single quotes           */
   289                                   /*     T - check common typos            */
   290                                   /*     P - require closure of quotes on  */
   291                                   /*         every paragraph               */
   292                                   /*     X - "Trust no one" :-) Paranoid!  */
   293                                   /*         Queries everything            */
   294                                   /*     L - line end checking defaults on */
   295                                   /*         -L turns it off               */
   296                                   /*     O - overview. Just shows counts.  */
   297                                   /*     Y - puts errors to stdout         */
   298                                   /*         instead of stderr             */
   299                                   /*     H - Echoes header fields          */
   300                                   /*     M - Ignore markup in < >          */
   301                                   /*     U - Use file of User-defined Typos*/
   302                                   /*     W - Defaults for use on Web upload*/
   303                                   /*     V - Verbose - list EVERYTHING!    */
   304 #define SWITNO 14                 /* max number of switch parms            */
   305                                   /*        - used for defining array-size */
   306 #define MINARGS   1               /* minimum no of args excl switches      */
   307 #define MAXARGS   1               /* maximum no of args excl switches      */
   308 
   309 int pswit[SWITNO];                /* program switches set by SWITCHES      */
   310 
   311 #define ECHO_SWITCH      0
   312 #define SQUOTE_SWITCH    1
   313 #define TYPO_SWITCH      2
   314 #define QPARA_SWITCH     3
   315 #define PARANOID_SWITCH  4
   316 #define LINE_END_SWITCH  5
   317 #define OVERVIEW_SWITCH  6
   318 #define STDOUT_SWITCH    7
   319 #define HEADER_SWITCH    8
   320 #define WEB_SWITCH       9
   321 #define VERBOSE_SWITCH   10
   322 #define MARKUP_SWITCH    11
   323 #define USERTYPO_SWITCH  12
   324 #define DP_SWITCH        13
   325 
   326 long cnt_dquot;       /* for overview mode, count of doublequote queries */
   327 long cnt_squot;       /* for overview mode, count of singlequote queries */
   328 long cnt_brack;       /* for overview mode, count of brackets queries */
   329 long cnt_bin;         /* for overview mode, count of non-ASCII queries */
   330 long cnt_odd;         /* for overview mode, count of odd character queries */
   331 long cnt_long;        /* for overview mode, count of long line errors */
   332 long cnt_short;       /* for overview mode, count of short line queries */
   333 long cnt_punct;       /* for overview mode, count of punctuation and spacing queries */
   334 long cnt_dash;        /* for overview mode, count of dash-related queries */
   335 long cnt_word;        /* for overview mode, count of word queries */
   336 long cnt_html;        /* for overview mode, count of html queries */
   337 long cnt_lineend;     /* for overview mode, count of line-end queries */
   338 long cnt_spacend;     /* count of lines with space at end */
   339 long linecnt;         /* count of total lines in the file */
   340 long checked_linecnt; /* count of lines actually checked */
   341 
   342 void proghelp(void);
   343 void procfile(char *);
   344 
   345 #define LOW_THRESHOLD    0
   346 #define HIGH_THRESHOLD   1
   347 
   348 #define START 0
   349 #define END 1
   350 #define PREV 0
   351 #define NEXT 1
   352 #define FIRST_OF_PAIR 0
   353 #define SECOND_OF_PAIR 1
   354 
   355 #define MAX_WORDPAIR 1000
   356 
   357 char running_from[MAX_PATH];
   358 
   359 int mixdigit(char *);
   360 char *getaword(char *,char *);
   361 int matchword(char *,char *);
   362 char *flgets(char *,int,FILE *,long);
   363 void lowerit(char *);
   364 int gcisalpha(unsigned char);
   365 int gcisdigit(unsigned char);
   366 int gcisletter(unsigned char);
   367 char *gcstrchr(char *s,char c);
   368 void postprocess_for_HTML(char *);
   369 char *linehasmarkup(char *);
   370 char *losemarkup(char *);
   371 int tagcomp(char *,char *);
   372 char *loseentities(char *);
   373 int isroman(char *);
   374 int usertypo_count;
   375 void postprocess_for_DP(char *);
   376 
   377 char wrk[LINEBUFSIZE];
   378 
   379 #define MAX_QWORD 50
   380 #define MAX_QWORD_LENGTH 40
   381 char qword[MAX_QWORD][MAX_QWORD_LENGTH];
   382 char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
   383 signed int dupcnt[MAX_QWORD];
   384 
   385 int main(int argc,char **argv)
   386 {
   387     char *argsw,*s;
   388     int i,switno,invarg;
   389     char usertypo_file[MAX_PATH];
   390     FILE *usertypofile;
   391     if (strlen(argv[0])<sizeof(running_from))
   392 	/* save the path to the executable */
   393         strcpy(running_from,argv[0]);
   394     /* find out what directory we're running from */
   395     s=running_from+strlen(running_from);
   396     for (;*s!='/' && *s!='\\' && s>=running_from;s--)
   397         *s=0;
   398     switno=strlen(SWITCHES);
   399     for (i=switno;--i>0;)
   400         pswit[i]=0;           /* initialise switches */
   401     /*
   402      * Standard loop to extract switches.
   403      * When we come out of this loop, the arguments will be
   404      * in argv[0] upwards and the switches used will be
   405      * represented by their equivalent elements in pswit[]
   406      */
   407     while (--argc>0 && **++argv=='-')
   408         for (argsw=argv[0]+1;*argsw!='\0';argsw++)
   409             for (i=switno,invarg=1;(--i>=0) && invarg==1;)
   410                 if ((toupper(*argsw))==SWITCHES[i])
   411 		{
   412                     invarg=0;
   413                     pswit[i]=1;
   414 		}
   415     /* Paranoid checking is turned OFF, not on, by its switch */
   416     pswit[PARANOID_SWITCH]^=1;
   417     if (pswit[PARANOID_SWITCH])
   418 	/* if running in paranoid mode force typo checks as well   */
   419         pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1;
   420     /* Line-end checking is turned OFF, not on, by its switch */
   421     pswit[LINE_END_SWITCH]^=1;
   422     /* Echoing is turned OFF, not on, by its switch */
   423     pswit[ECHO_SWITCH]^=1;
   424     if (pswit[OVERVIEW_SWITCH])
   425 	/* just print summary; don't echo */
   426         pswit[ECHO_SWITCH]=0;
   427     /*
   428      * Web uploads - for the moment, this is really just a placeholder
   429      * until we decide what processing we really want to do on web uploads
   430      */
   431     if (pswit[WEB_SWITCH])
   432     {
   433 	/* specific override for web uploads */
   434         pswit[ECHO_SWITCH]=1;
   435         pswit[SQUOTE_SWITCH]=0;
   436         pswit[TYPO_SWITCH]=1;
   437         pswit[QPARA_SWITCH]=0;
   438         pswit[PARANOID_SWITCH]=1;
   439         pswit[LINE_END_SWITCH]=0;
   440         pswit[OVERVIEW_SWITCH]=0;
   441         pswit[STDOUT_SWITCH]=0;
   442         pswit[HEADER_SWITCH]=1;
   443         pswit[VERBOSE_SWITCH]=0;
   444         pswit[MARKUP_SWITCH]=0;
   445         pswit[USERTYPO_SWITCH]=0;
   446         pswit[DP_SWITCH]=0;
   447     }
   448     if (argc<MINARGS || argc>MAXARGS)
   449     {
   450 	/* check number of args */
   451         proghelp();
   452         return 1;
   453     }
   454     /* read in the user-defined stealth scanno list */
   455     if (pswit[USERTYPO_SWITCH])
   456     {
   457 	/* ... we were told we had one! */
   458         usertypofile=fopen(USERTYPO_FILE,"rb");
   459         if (!usertypofile)
   460 	{
   461 	    /* not in cwd. try excuteable directory. */
   462             strcpy(usertypo_file,running_from);
   463             strcat(usertypo_file,USERTYPO_FILE);
   464             usertypofile=fopen(usertypo_file,"rb");
   465             if (!usertypofile) {
   466 		/* we ain't got no user typo file! */
   467                 printf("   --> I couldn't find gutcheck.typ "
   468 		  "-- proceeding without user typos.\n");
   469 	    }
   470 	}
   471         usertypo_count=0;
   472         if (usertypofile)
   473 	{
   474 	    /* we managed to open a User Typo File! */
   475             if (pswit[USERTYPO_SWITCH])
   476 	    {
   477                 while (flgets(aline,LINEBUFSIZE-1,usertypofile,
   478 		  (long)usertypo_count))
   479 		{
   480                     if (strlen(aline)>1)
   481 		    {
   482                         if ((int)*aline>33)
   483 			{
   484                             s=malloc(strlen(aline)+1);
   485                             if (!s)
   486 			    {
   487                                 fprintf(stderr,"bookloupe: cannot get enough "
   488 				  "memory for user typo file!\n");
   489                                 exit(1);
   490 			    }
   491                             strcpy(s,aline);
   492                             usertypo[usertypo_count]=s;
   493                             usertypo_count++;
   494                             if (usertypo_count>=MAX_USER_TYPOS)
   495 			    {
   496                                 printf("   --> Only %d user-defined typos "
   497 				  "allowed: ignoring the rest\n");
   498                                 break;
   499 			    }
   500 			}
   501 		    }
   502 		}
   503 	    }
   504             fclose(usertypofile);
   505 	}
   506     }
   507     fprintf(stderr,"bookloupe: Check and report on an e-text\n");
   508     cnt_dquot=cnt_squot=cnt_brack=cnt_bin=cnt_odd=cnt_long=
   509     cnt_short=cnt_punct=cnt_dash=cnt_word=cnt_html=cnt_lineend=
   510     cnt_spacend=0;
   511     procfile(argv[0]);
   512     if (pswit[OVERVIEW_SWITCH])
   513     {
   514 	printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
   515 	  checked_linecnt,linecnt,linecnt-checked_linecnt);
   516         printf("    --------------- Queries found --------------\n");
   517         if (cnt_long)
   518 	    printf("    Long lines:                    %14ld\n",cnt_long);
   519         if (cnt_short)
   520 	    printf("    Short lines:                   %14ld\n",cnt_short);
   521         if (cnt_lineend)
   522 	    printf("    Line-end problems:             %14ld\n",cnt_lineend);
   523         if (cnt_word)
   524 	    printf("    Common typos:                  %14ld\n",cnt_word);
   525         if (cnt_dquot)
   526 	    printf("    Unmatched quotes:              %14ld\n",cnt_dquot);
   527         if (cnt_squot)
   528 	    printf("    Unmatched SingleQuotes:        %14ld\n",cnt_squot);
   529         if (cnt_brack)
   530 	    printf("    Unmatched brackets:            %14ld\n",cnt_brack);
   531         if (cnt_bin)
   532 	    printf("    Non-ASCII characters:          %14ld\n",cnt_bin);
   533         if (cnt_odd)
   534 	    printf("    Proofing characters:           %14ld\n",cnt_odd);
   535         if (cnt_punct)
   536 	    printf("    Punctuation & spacing queries: %14ld\n",cnt_punct);
   537         if (cnt_dash)
   538 	    printf("    Non-standard dashes:           %14ld\n",cnt_dash);
   539         if (cnt_html)
   540 	    printf("    Possible HTML tags:            %14ld\n",cnt_html);
   541         printf("\n");
   542         printf("    TOTAL QUERIES                  %14ld\n",
   543           cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
   544           cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
   545     }
   546     return 0;
   547 }
   548 
   549 /*
   550  * procfile:
   551  *
   552  * Process one file.
   553  */
   554 void procfile(char *filename)
   555 {
   556     char *s,*t,*s1,laststart,*wordstart;
   557     char inword[MAXWORDLEN],testword[MAXWORDLEN];
   558     char parastart[81];     /* first line of current para */
   559     FILE *infile;
   560     long quot,squot,firstline,alphalen,totlen,binlen,
   561       shortline,longline,verylongline,spacedash,emdash,
   562       space_emdash,non_PG_space_emdash,PG_space_emdash,
   563       footerline,dotcomma,start_para_line,astline,fslashline,
   564       standalone_digit,hyphens,htmcount,endquote_count;
   565     long spline,nspline;
   566     signed int i,j,llen,isemptyline,isacro,isellipsis,istypo,alower,
   567       eNon_A,eTab,eTilde,eAst,eFSlash,eCarat;
   568     signed int warn_short,warn_long,warn_bin,warn_dash,warn_dotcomma,
   569       warn_ast,warn_fslash,warn_digit,warn_hyphen,warn_endquote;
   570     unsigned int lastlen,lastblen;
   571     signed int s_brack,c_brack,r_brack,c_unders;
   572     signed int open_single_quote,close_single_quote,guessquote,dquotepar,
   573       squotepar;
   574     signed int isnewpara,vowel,consonant;
   575     char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],
   576       cbrack_err[80],unders_err[80];
   577     signed int qword_index,qperiod_index,isdup;
   578     signed int enddash;
   579     signed int Dutchcount,isDutch,Frenchcount,isFrench;
   580     laststart=CHAR_SPACE;
   581     lastlen=lastblen=0;
   582     *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=
   583       *unders_err=*prevline=0;
   584     linecnt=firstline=alphalen=totlen=binlen=
   585       shortline=longline=spacedash=emdash=checked_linecnt=
   586       space_emdash=non_PG_space_emdash=PG_space_emdash=
   587       footerline=dotcomma=start_para_line=astline=fslashline= 
   588       standalone_digit=hyphens=htmcount=endquote_count=0;
   589     quot=squot=s_brack=c_brack=r_brack=c_unders=0;
   590     i=llen=isemptyline=isacro=isellipsis=istypo=0;
   591     warn_short=warn_long=warn_bin=warn_dash=warn_dotcomma= 
   592       warn_ast=warn_fslash=warn_digit=warn_endquote=0;
   593     isnewpara=vowel=consonant=enddash=0;
   594     spline=nspline=0;
   595     qword_index=qperiod_index=isdup=0;
   596     *inword=*testword=0;
   597     open_single_quote=close_single_quote=guessquote=dquotepar=squotepar=0;
   598     Dutchcount=isDutch=Frenchcount=isFrench=0;
   599     for (j=0;j<MAX_QWORD;j++)
   600     {
   601         dupcnt[j]=0;
   602         for (i=0;i<MAX_QWORD_LENGTH;i++)
   603 	{
   604             qword[i][j]=0;
   605             qperiod[i][j]=0;
   606 	}
   607     }
   608     infile=fopen(filename,"rb");
   609     if (!infile)
   610     {
   611         if (pswit[STDOUT_SWITCH])
   612             fprintf(stdout,"bookloupe: cannot open %s\n",filename);
   613         else
   614             fprintf(stderr,"bookloupe: cannot open %s\n",filename);
   615 	exit(1);
   616     }
   617     fprintf(stdout,"\n\nFile: %s\n\n",filename);
   618     firstline=shortline=longline=verylongline=0;
   619     /*
   620      * Run a first pass - verify that it's a valid PG
   621      * file, decide whether to report some things that
   622      * occur many times in the text like long or short
   623      * lines, non-standard dashes, etc.
   624      */
   625     while (fgets(aline,LINEBUFSIZE-1,infile))
   626     {
   627         while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13)
   628 	    aline[strlen(aline)-1]=0;
   629         linecnt++;
   630         if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") &&
   631 	  (strstr(aline,"PUBLIC DOMAIN") || strstr(aline,"COPYRIGHT")))
   632 	{
   633             if (spline)
   634                 printf("   --> Duplicate header?\n");
   635             spline=linecnt+1;   /* first line of non-header text, that is */
   636 	}
   637         if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG"))
   638 	{
   639             if (nspline)
   640                 printf("   --> Duplicate header?\n");
   641             nspline=linecnt+1;   /* first line of non-header text, that is */
   642 	}
   643         if (spline || nspline)
   644 	{
   645             lowerit(aline);
   646             if (strstr(aline,"end") && strstr(aline,"project gutenberg"))
   647 	    {
   648                 if (strstr(aline,"end")<strstr(aline,"project gutenberg"))
   649 		{
   650                     if (footerline)
   651 		    {
   652 			/* it's an old-form header - we can detect duplicates */
   653                         if (!nspline)
   654                             printf("   --> Duplicate footer?\n");
   655 		    }
   656                     else
   657                         footerline=linecnt;
   658 		}
   659 	    }
   660 	}
   661         if (spline)
   662 	    firstline=spline;
   663         if (nspline)
   664 	    firstline=nspline;  /* override with new */
   665         if (footerline)
   666 	    continue;    /* don't count the boilerplate in the footer */
   667         llen=strlen(aline);
   668         totlen+=llen;
   669         for (i=0;i<llen;i++)
   670 	{
   671             if ((unsigned char)aline[i]>127)
   672 		binlen++;
   673             if (gcisalpha(aline[i]))
   674 		alphalen++;
   675             if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
   676 		endquote_count++;
   677 	}
   678         if (strlen(aline)>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE &&
   679 	  lastblen>2 && lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
   680 	    shortline++;
   681         if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE)
   682 	    cnt_spacend++;
   683         if (strstr(aline,".,"))
   684 	    dotcomma++;
   685         /* only count ast lines for ignoring purposes where there is */
   686         /* locase text on the line */
   687         if (strstr(aline,"*"))
   688 	{
   689             for (s=aline;*s;s++)
   690                 if (*s>='a' && *s<='z')
   691                     break;
   692              if (*s)
   693 		astline++;
   694 	}
   695         if (strstr(aline,"/"))
   696             fslashline++;
   697         for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
   698 	    ;
   699         if (aline[i]=='-' && aline[i-1]!='-')
   700 	    hyphens++;
   701         if (llen>LONGEST_PG_LINE)
   702 	    longline++;
   703         if (llen>WAY_TOO_LONG)
   704 	    verylongline++;
   705         if (strstr(aline,"<") && strstr(aline,">"))
   706 	{
   707             i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);
   708             if (i>0)
   709                 htmcount++;
   710             if (strstr(aline,"<i>"))
   711 		htmcount+=4; /* bonus marks! */
   712 	}
   713         /* Check for spaced em-dashes */
   714         if (strstr(aline,"--"))
   715 	{
   716             emdash++;
   717             if (*(strstr(aline,"--")-1)==CHAR_SPACE ||
   718                (*(strstr(aline,"--")+2)==CHAR_SPACE))
   719 		space_emdash++;
   720             if (*(strstr(aline,"--")-1)==CHAR_SPACE &&
   721                (*(strstr(aline,"--")+2)==CHAR_SPACE))
   722 		/* count of em-dashes with spaces both sides */
   723 		non_PG_space_emdash++;
   724             if (*(strstr(aline,"--")-1)!=CHAR_SPACE &&
   725                (*(strstr(aline,"--")+2)!=CHAR_SPACE))
   726 		/* count of PG-type em-dashes with no spaces */
   727 		PG_space_emdash++;
   728 	}
   729         for (s=aline;*s;)
   730 	{
   731             s=getaword(s,inword);
   732             if (!strcmp(inword,"hij") || !strcmp(inword,"niet")) 
   733                 Dutchcount++;
   734             if (!strcmp(inword,"dans") || !strcmp(inword,"avec")) 
   735                 Frenchcount++;
   736             if (!strcmp(inword,"0") || !strcmp(inword,"1")) 
   737                 standalone_digit++;
   738 	}
   739         /* Check for spaced dashes */
   740         if (strstr(aline," -") && *(strstr(aline," -")+2)!='-')
   741 	    spacedash++;
   742         lastblen=lastlen;
   743         lastlen=strlen(aline);
   744         laststart=aline[0];
   745     }
   746     fclose(infile);
   747     /* now, based on this quick view, make some snap decisions */
   748     if (cnt_spacend>0)
   749         printf("   --> %ld lines in this file have white space at end\n",
   750 	  cnt_spacend);
   751     warn_dotcomma=1;
   752     if (dotcomma>5)
   753     {
   754         warn_dotcomma=0;
   755         printf("   --> %ld lines in this file contain '.,'. "
   756 	  "Not reporting them.\n",dotcomma);
   757     }
   758     /* if more than 50 lines, or one-tenth, are short,
   759      * don't bother reporting them */
   760     warn_short=1;
   761     if (shortline>50 || shortline*10>linecnt)
   762     {
   763         warn_short=0;
   764         printf("   --> %ld lines in this file are short. "
   765 	  "Not reporting short lines.\n",shortline);
   766     }
   767     /*
   768      * If more than 50 lines, or one-tenth, are long,
   769      * don't bother reporting them.
   770      */
   771     warn_long=1;
   772     if (longline>50 || longline*10>linecnt)
   773     {
   774         warn_long=0;
   775         printf("   --> %ld lines in this file are long. "
   776 	  "Not reporting long lines.\n",longline);
   777     }
   778     /* If more than 10 lines contain asterisks, don't bother reporting them. */
   779     warn_ast=1;
   780     if (astline>10)
   781     {
   782         warn_ast=0;
   783         printf("   --> %ld lines in this file contain asterisks. "
   784 	  "Not reporting them.\n",astline);
   785     }
   786     /*
   787      * If more than 10 lines contain forward slashes,
   788      * don't bother reporting them.
   789      */
   790     warn_fslash=1;
   791     if (fslashline>10)
   792     {
   793         warn_fslash=0;
   794         printf("   --> %ld lines in this file contain forward slashes. "
   795 	  "Not reporting them.\n",fslashline);
   796     }
   797     /*
   798      * If more than 20 lines contain unpunctuated endquotes,
   799      * don't bother reporting them.
   800      */
   801     warn_endquote=1;
   802     if (endquote_count>20)
   803     {
   804         warn_endquote=0;
   805         printf("   --> %ld lines in this file contain unpunctuated endquotes. "
   806 	  "Not reporting them.\n",endquote_count);
   807     }
   808     /*
   809      * If more than 15 lines contain standalone digits,
   810      * don't bother reporting them.
   811      */
   812     warn_digit=1;
   813     if (standalone_digit>10)
   814     {
   815         warn_digit=0;
   816         printf("   --> %ld lines in this file contain standalone 0s and 1s. "
   817 	  "Not reporting them.\n",standalone_digit);
   818     }
   819     /*
   820      * If more than 20 lines contain hyphens at end,
   821      * don't bother reporting them.
   822      */
   823     warn_hyphen=1;
   824     if (hyphens>20)
   825     {
   826         warn_hyphen=0;
   827         printf("   --> %ld lines in this file have hyphens at end. "
   828 	  "Not reporting them.\n",hyphens);
   829     }
   830     if (htmcount>20 && !pswit[MARKUP_SWITCH])
   831     {
   832         printf("   --> Looks like this is HTML. Switching HTML mode ON.\n");
   833         pswit[MARKUP_SWITCH]=1;
   834     }
   835     if (verylongline>0)
   836         printf("   --> %ld lines in this file are VERY long!\n",verylongline);
   837     /*
   838      * If there are more non-PG spaced dashes than PG em-dashes,
   839      * assume it's deliberate.
   840      * Current PG guidelines say don't use them, but older texts do,
   841      * and some people insist on them whatever the guidelines say.
   842      */
   843     warn_dash=1;
   844     if (spacedash+non_PG_space_emdash>PG_space_emdash)
   845     {
   846         warn_dash=0;
   847         printf("   --> There are %ld spaced dashes and em-dashes. "
   848 	  "Not reporting them.\n",spacedash+non_PG_space_emdash);
   849     }
   850     /* If more than a quarter of characters are hi-bit, bug out. */
   851     warn_bin=1;
   852     if (binlen*4>totlen)
   853     {
   854         printf("   --> This file does not appear to be ASCII. "
   855 	  "Terminating. Best of luck with it!\n");
   856         exit(1);
   857     }
   858     if (alphalen*4<totlen)
   859     {
   860         printf("   --> This file does not appear to be text. "
   861 	  "Terminating. Best of luck with it!\n");
   862         exit(1);
   863     }
   864     if (binlen*100>totlen || binlen>100)
   865     {
   866         printf("   --> There are a lot of foreign letters here. "
   867 	  "Not reporting them.\n");
   868         warn_bin=0;
   869     }
   870     isDutch=0;
   871     if (Dutchcount>50)
   872     {
   873         isDutch=1;
   874         printf("   --> This looks like Dutch - "
   875 	  "switching off dashes and warnings for 's Middags case.\n");
   876     }
   877     isFrench=0;
   878     if (Frenchcount>50)
   879     {
   880         isFrench=1;
   881         printf("   --> This looks like French - "
   882 	  "switching off some doublepunct.\n");
   883     }
   884     if (firstline && footerline)
   885         printf("    The PG header and footer appear to be already on.\n");
   886     else
   887     {
   888         if (firstline)
   889             printf("    The PG header is on - no footer.\n");
   890         if (footerline)
   891             printf("    The PG footer is on - no header.\n");
   892     }
   893     printf("\n");
   894     if (pswit[VERBOSE_SWITCH])
   895     {
   896         warn_bin=1;
   897         warn_short=1;
   898         warn_dotcomma=1;
   899         warn_long=1;
   900         warn_dash=1;
   901         warn_digit=1;
   902         warn_ast=1;
   903         warn_fslash=1;
   904         warn_hyphen=1;
   905         warn_endquote=1;
   906         printf("   *** Verbose output is ON -- you asked for it! ***\n");
   907     }
   908     if (isDutch)
   909         warn_dash=0;
   910     infile=fopen(filename,"rb");
   911     if (!infile)
   912     {
   913         if (pswit[STDOUT_SWITCH])
   914             fprintf(stdout,"bookloupe: cannot open %s\n",filename);
   915         else
   916             fprintf(stderr,"bookloupe: cannot open %s\n",filename);
   917 	exit(1);
   918     }
   919     if (footerline>0 && firstline>0 && footerline>firstline &&
   920       footerline-firstline<100)
   921     {
   922         printf("   --> I don't really know where this text starts. \n");
   923         printf("       There are no reference points.\n");
   924         printf("       I'm going to have to report the header and footer "
   925 	  "as well.\n");
   926         firstline=0;
   927     }
   928     /*
   929      * Here we go with the main pass. Hold onto yer hat!
   930      * Re-init some variables we've dirtied.
   931      */
   932     quot=squot=linecnt=0;
   933     laststart=CHAR_SPACE;
   934     lastlen=lastblen=0;
   935     while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
   936     {
   937         linecnt++;
   938         if (linecnt==1)
   939 	    isnewpara=1;
   940         if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11))
   941 	    continue;    // skip DP page separators completely
   942         if (linecnt<firstline || (footerline>0 && linecnt>footerline))
   943 	{
   944             if (pswit[HEADER_SWITCH])
   945 	    {
   946                 if (!strncmp(aline,"Title:",6))
   947                     printf("    %s\n",aline);
   948                 if (!strncmp(aline,"Author:",7))
   949                     printf("    %s\n",aline);
   950                 if (!strncmp(aline,"Release Date:",13))
   951                     printf("    %s\n",aline);
   952                 if (!strncmp(aline,"Edition:",8))
   953                     printf("    %s\n\n",aline);
   954 	    }
   955             continue;                /* skip through the header */
   956 	}
   957         checked_linecnt++;
   958         s=aline;
   959         isemptyline=1;    /* assume the line is empty until proven otherwise */
   960         /*
   961 	 * If we are in a state of unbalanced quotes, and this line
   962          * doesn't begin with a quote, output the stored error message.
   963          * If the -P switch was used, print the warning even if the
   964          * new para starts with quotes.
   965 	 */
   966         t=s;
   967         while (*t==' ')
   968 	    t++;
   969         if (*dquote_err)
   970             if (*t!=CHAR_DQUOTE || pswit[QPARA_SWITCH])
   971 	    {
   972                 if (!pswit[OVERVIEW_SWITCH])
   973 		{
   974                     if (pswit[ECHO_SWITCH])
   975 			printf("\n%s\n",parastart);
   976                     printf(dquote_err);
   977 		}
   978                 else
   979                     cnt_dquot++;
   980             }
   981         if (*squote_err)
   982 	{
   983             if (*t!=CHAR_SQUOTE && *t!=CHAR_OPEN_SQUOTE ||
   984 	      pswit[QPARA_SWITCH] || squot)
   985 	    {
   986                 if (!pswit[OVERVIEW_SWITCH])
   987 		{
   988                     if (pswit[ECHO_SWITCH])
   989 			printf("\n%s\n",parastart);
   990                     printf(squote_err);
   991 		}
   992                 else
   993                     cnt_squot++;
   994 	    }
   995             squot=0;
   996 	}
   997         if (*rbrack_err)
   998 	{
   999             if (!pswit[OVERVIEW_SWITCH])
  1000 	    {
  1001                 if (pswit[ECHO_SWITCH])
  1002 		    printf("\n%s\n",parastart);
  1003                 printf(rbrack_err);
  1004 	    }
  1005             else
  1006                 cnt_brack++;
  1007 	}
  1008         if (*sbrack_err)
  1009 	{
  1010             if (!pswit[OVERVIEW_SWITCH])
  1011 	    {
  1012                 if (pswit[ECHO_SWITCH])
  1013 		    printf("\n%s\n",parastart);
  1014                 printf(sbrack_err);
  1015 	    }
  1016             else
  1017                 cnt_brack++;
  1018 	}
  1019         if (*cbrack_err)
  1020 	{
  1021             if (!pswit[OVERVIEW_SWITCH])
  1022 	    {
  1023                 if (pswit[ECHO_SWITCH])
  1024 		    printf("\n%s\n",parastart);
  1025                 printf(cbrack_err);
  1026 	    }
  1027             else
  1028                 cnt_brack++;
  1029 	}
  1030         if (*unders_err)
  1031 	{
  1032             if (!pswit[OVERVIEW_SWITCH])
  1033 	    {
  1034                 if (pswit[ECHO_SWITCH])
  1035 		    printf("\n%s\n",parastart);
  1036                 printf(unders_err);
  1037 	    }
  1038             else
  1039                 cnt_brack++;
  1040 	}
  1041         *dquote_err=*squote_err=*rbrack_err=*cbrack_err= 
  1042 	  *sbrack_err=*unders_err=0;
  1043 	/*
  1044          * Look along the line, accumulate the count of quotes, and see
  1045          * if this is an empty line - i.e. a line with nothing on it
  1046          * but spaces.
  1047          * If line has just spaces, period, * and/or - on it, don't
  1048          * count it, since empty lines with asterisks or dashes to
  1049          * separate sections are common.
  1050 	 */
  1051         s=aline;
  1052         while (*s)
  1053 	{
  1054             if (*s==CHAR_DQUOTE)
  1055 		quot++;
  1056             if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)
  1057 	    {
  1058                 if (s==aline)
  1059 		{
  1060 		    /*
  1061 		     * At start of line, it can only be an openquote.
  1062 		     * Hardcode a very common exception!
  1063 		     */
  1064                     if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))
  1065                         open_single_quote++;
  1066 		}
  1067                 else if (gcisalpha(*(s-1)) && gcisalpha(*(s+1)))
  1068 		    /* Do nothing! it's definitely an apostrophe, not a quote */
  1069 		    ;
  1070 		/* it's outside a word - let's check it out */
  1071 		else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(*(s+1)))
  1072 		{
  1073 		    /* it damwell better BE an openquote */
  1074 		    if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))
  1075 			/* hardcode a very common exception! */
  1076 			open_single_quote++;
  1077 		}
  1078 		else
  1079 		{
  1080 		    /* now - is it a closequote? */
  1081 		    guessquote=0;   /* accumulate clues */
  1082 		    if (gcisalpha(s[-1]))
  1083 		    {
  1084 			/* it follows a letter - could be either */
  1085 			guessquote+=1;
  1086 			if (s[-1]=='s')
  1087 			{
  1088 			    /* looks like a plural apostrophe */
  1089 			    guessquote-=3;
  1090 			    if (s[1]==CHAR_SPACE)  /* bonus marks! */
  1091 				guessquote-=2;
  1092 			}
  1093 		    }
  1094 		    /* it doesn't have a letter either side */
  1095 		    else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))
  1096 			guessquote+=8; /* looks like a closequote */
  1097 		    else
  1098 			guessquote++;
  1099 		    if (open_single_quote>close_single_quote)
  1100 			/*
  1101 			 * Give it the benefit of some doubt,
  1102 			 * if a squote is already open.
  1103 			 */
  1104 			guessquote++;
  1105 		    else
  1106 			guessquote--;
  1107 		    if (guessquote>=0)
  1108 			close_single_quote++;
  1109 		}
  1110 	    }
  1111 	    if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
  1112 	      *s!=13 && *s!=10)
  1113 		isemptyline=0;  /* ignore lines like  *  *  *  as spacers */
  1114 	    if (*s==CHAR_UNDERSCORE)
  1115 		c_unders++;
  1116 	    if (*s==CHAR_OPEN_CBRACK)
  1117 		c_brack++;
  1118 	    if (*s==CHAR_CLOSE_CBRACK)
  1119 		c_brack--;
  1120 	    if (*s==CHAR_OPEN_RBRACK)
  1121 		r_brack++;
  1122 	    if (*s==CHAR_CLOSE_RBRACK)
  1123 		r_brack--;
  1124 	    if (*s==CHAR_OPEN_SBRACK)
  1125 		s_brack++;
  1126 	    if (*s==CHAR_CLOSE_SBRACK)
  1127 		s_brack--;
  1128 	    s++;
  1129 	}
  1130         if (isnewpara && !isemptyline)
  1131 	{
  1132 	    /* This line is the start of a new paragraph. */
  1133             start_para_line=linecnt;
  1134 	    /* Capture its first line in case we want to report it later. */
  1135             strncpy(parastart,aline,80);
  1136             parastart[79]=0;
  1137             dquotepar=squotepar=0; /* restart the quote count */
  1138             s=aline;
  1139             while (!gcisalpha(*s) && !gcisdigit(*s) && *s)
  1140 		s++;
  1141             if (*s>='a' && *s<='z')
  1142 	    {
  1143 		/* and its first letter is lowercase */
  1144                 if (pswit[ECHO_SWITCH])
  1145 		    printf("\n%s\n",aline);
  1146                 if (!pswit[OVERVIEW_SWITCH])
  1147                     printf("    Line %ld column %d - "
  1148 		      "Paragraph starts with lower-case\n",
  1149 		      linecnt,(int)(s-aline)+1);
  1150                 else
  1151                     cnt_punct++;
  1152 	    }
  1153             isnewpara=0; /* Signal the end of new para processing. */
  1154 	}
  1155         /* Check for an em-dash broken at line end. */
  1156         if (enddash && *aline=='-')
  1157 	{
  1158             if (pswit[ECHO_SWITCH])
  1159 		printf("\n%s\n",aline);
  1160             if (!pswit[OVERVIEW_SWITCH])
  1161                 printf("    Line %ld column 1 - Broken em-dash?\n",linecnt);
  1162             else
  1163                 cnt_punct++;
  1164 	}
  1165         enddash=0;
  1166         for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--)
  1167 	    ;
  1168         if (s>=aline && *s=='-')
  1169             enddash=1;
  1170 	/*
  1171          * Check for invalid or questionable characters in the line
  1172          * Anything above 127 is invalid for plain ASCII, and
  1173          * non-printable control characters should also be flagged.
  1174          * Tabs should generally not be there.
  1175 	 */
  1176         for (s=aline;*s;s++)
  1177 	{
  1178             i=(unsigned char)*s;
  1179             if (i<CHAR_SPACE && i!=CHAR_LF && i!=CHAR_CR && i!=CHAR_TAB)
  1180 	    {
  1181                 if (pswit[ECHO_SWITCH])
  1182 		    printf("\n%s\n",aline);
  1183                 if (!pswit[OVERVIEW_SWITCH])
  1184                     printf("    Line %ld column %d - Control character %d\n",
  1185 		      linecnt,(int)(s-aline)+1,i);
  1186                 else
  1187                     cnt_bin++;
  1188 	    }
  1189 	}
  1190         if (warn_bin)
  1191 	{
  1192 	    /* Don't repeat multiple warnings on one line. */
  1193             eNon_A=eTab=eTilde=eCarat=eFSlash=eAst=0;
  1194             for (s=aline;*s;s++)
  1195 	    {
  1196                 if (!eNon_A &&
  1197 		  (*s<CHAR_SPACE && *s!=9 && *s!='\n' || (unsigned char)*s>127))
  1198 		{
  1199                     i=*s;  /* annoying kludge for signed chars */
  1200                     if (i<0)
  1201 			i+=256;
  1202                     if (pswit[ECHO_SWITCH])
  1203 			printf("\n%s\n",aline);
  1204                     if (!pswit[OVERVIEW_SWITCH])
  1205                         if (i>127 && i<160)
  1206                             printf("    Line %ld column %d - "
  1207 			      "Non-ISO-8859 character %d\n",
  1208 			      linecnt,(int)(s-aline)+1,i);
  1209                         else
  1210                             printf("    Line %ld column %d - "
  1211 			      "Non-ASCII character %d\n",
  1212 			      linecnt,(int)(s-aline)+1,i);
  1213                     else
  1214                         cnt_bin++;
  1215                     eNon_A=1;
  1216 		}
  1217                 if (!eTab && *s==CHAR_TAB)
  1218 		{
  1219                     if (pswit[ECHO_SWITCH])
  1220 			printf("\n%s\n",aline);
  1221                     if (!pswit[OVERVIEW_SWITCH])
  1222                         printf("    Line %ld column %d - Tab character?\n",
  1223 			  linecnt,(int)(s-aline)+1);
  1224                     else
  1225                         cnt_odd++;
  1226                     eTab=1;
  1227 		}
  1228                 if (!eTilde && *s==CHAR_TILDE)
  1229 		{
  1230 		    /*
  1231 		     * Often used by OCR software to indicate an
  1232 		     * unrecognizable character.
  1233 		     */
  1234                     if (pswit[ECHO_SWITCH])
  1235 			printf("\n%s\n",aline);
  1236                     if (!pswit[OVERVIEW_SWITCH])
  1237                         printf("    Line %ld column %d - Tilde character?\n",
  1238 			  linecnt,(int)(s-aline)+1);
  1239                     else
  1240                         cnt_odd++;
  1241                     eTilde=1;
  1242 		}
  1243                 if (!eCarat && *s==CHAR_CARAT)
  1244 		{  
  1245                     if (pswit[ECHO_SWITCH])
  1246 			printf("\n%s\n",aline);
  1247                     if (!pswit[OVERVIEW_SWITCH])
  1248                         printf("    Line %ld column %d - Carat character?\n",
  1249 			  linecnt,(int)(s-aline)+1);
  1250                     else
  1251                         cnt_odd++;
  1252                     eCarat=1;
  1253 		}
  1254                 if (!eFSlash && *s==CHAR_FORESLASH && warn_fslash)
  1255 		{  
  1256                     if (pswit[ECHO_SWITCH])
  1257 			printf("\n%s\n",aline);
  1258                     if (!pswit[OVERVIEW_SWITCH])
  1259                         printf("    Line %ld column %d - Forward slash?\n",
  1260 			  linecnt,(int)(s-aline)+1);
  1261                     else
  1262                         cnt_odd++;
  1263                     eFSlash=1;
  1264 		}
  1265                 /*
  1266 		 * Report asterisks only in paranoid mode,
  1267 		 * since they're often deliberate.
  1268 		 */
  1269                 if (!eAst && pswit[PARANOID_SWITCH] && warn_ast &&
  1270 		  !isemptyline && *s==CHAR_ASTERISK)
  1271 		{
  1272                     if (pswit[ECHO_SWITCH])
  1273 			printf("\n%s\n",aline);
  1274                     if (!pswit[OVERVIEW_SWITCH])
  1275                         printf("    Line %ld column %d - Asterisk?\n",
  1276 			  linecnt,(int)(s-aline)+1);
  1277                     else
  1278                         cnt_odd++;
  1279                     eAst=1;
  1280 		}
  1281 	    }
  1282 	}
  1283         /* Check for line too long. */
  1284         if (warn_long)
  1285 	{
  1286             if (strlen(aline)>LONGEST_PG_LINE)
  1287 	    {
  1288                 if (pswit[ECHO_SWITCH])
  1289 		    printf("\n%s\n",aline);
  1290                 if (!pswit[OVERVIEW_SWITCH])
  1291                     printf("    Line %ld column %d - Long line %d\n",
  1292 		      linecnt,strlen(aline),strlen(aline));
  1293                 else
  1294                     cnt_long++;
  1295 	    }
  1296 	}
  1297         /*
  1298 	 * Check for line too short.
  1299          * This one is a bit trickier to implement: we don't want to
  1300          * flag the last line of a paragraph for being short, so we
  1301          * have to wait until we know that our current line is a
  1302          * "normal" line, then report the _previous_ line if it was too
  1303          * short. We also don't want to report indented lines like
  1304          * chapter heads or formatted quotations. We therefore keep
  1305          * lastlen as the length of the last line examined, and
  1306          * lastblen as the length of the last but one, and try to
  1307          * suppress unnecessary warnings by checking that both were of
  1308          * "normal" length. We keep the first character of the last
  1309          * line in laststart, and if it was a space, we assume that the
  1310          * formatting is deliberate. I can't figure out a way to
  1311          * distinguish something like a quoted verse left-aligned or
  1312          * the header or footer of a letter from a paragraph of short
  1313          * lines - maybe if I examined the whole paragraph, and if the
  1314          * para has less than, say, 8 lines and if all lines are short,
  1315          * then just assume it's OK? Need to look at some texts to see
  1316          * how often a formula like this would get the right result.
  1317 	 */
  1318         if (warn_short && strlen(aline)>1 && lastlen>1 &&
  1319 	  lastlen<SHORTEST_PG_LINE && lastblen>1 && lastblen>SHORTEST_PG_LINE &&
  1320 	  laststart!=CHAR_SPACE)
  1321 	{
  1322 	    if (pswit[ECHO_SWITCH])
  1323 		printf("\n%s\n",prevline);
  1324 	    if (!pswit[OVERVIEW_SWITCH])
  1325 		printf("    Line %ld column %d - Short line %d?\n",
  1326 		  linecnt-1,strlen(prevline),strlen(prevline));
  1327 	    else
  1328 		cnt_short++;
  1329 	}
  1330         lastblen=lastlen;
  1331         lastlen=strlen(aline);
  1332         laststart=aline[0];
  1333         /* Look for punctuation other than full ellipses at start of line. */
  1334         if (*aline && strchr(".?!,;:",aline[0]) && strncmp(". . .",aline,5))
  1335 	{
  1336 	    if (pswit[ECHO_SWITCH])
  1337 		printf("\n%s\n",aline);
  1338 	    if (!pswit[OVERVIEW_SWITCH])
  1339 		printf("    Line %ld column 1 - Begins with punctuation?\n",
  1340 		  linecnt);
  1341 	    else
  1342 		cnt_punct++;
  1343 	}
  1344         /*
  1345 	 * Check for spaced em-dashes.
  1346          * We must check _all_ occurrences of "--" on the line
  1347          * hence the loop - even if the first double-dash is OK
  1348          * there may be another that's wrong later on.
  1349 	 */
  1350         if (warn_dash)
  1351 	{
  1352             s=aline;
  1353             while (strstr(s,"--"))
  1354 	    {
  1355                 if (*(strstr(s,"--")-1)==CHAR_SPACE ||
  1356                    (*(strstr(s,"--")+2)==CHAR_SPACE))
  1357 		{
  1358                     if (pswit[ECHO_SWITCH])
  1359 			printf("\n%s\n",aline);
  1360                     if (!pswit[OVERVIEW_SWITCH])
  1361                         printf("    Line %ld column %d - Spaced em-dash?\n",
  1362 			  linecnt,(int)(strstr(s,"--")-aline)+1);
  1363                     else
  1364                         cnt_dash++;
  1365 		}
  1366                 s=strstr(s,"--")+2;
  1367 	    }
  1368 	}
  1369         /* Check for spaced dashes. */
  1370         if (warn_dash)
  1371 	{
  1372             if (strstr(aline," -"))
  1373 	    {
  1374                 if (*(strstr(aline," -")+2)!='-')
  1375 		{
  1376                     if (pswit[ECHO_SWITCH])
  1377 			printf("\n%s\n",aline);
  1378                     if (!pswit[OVERVIEW_SWITCH])
  1379                         printf("    Line %ld column %d - Spaced dash?\n",
  1380 			  linecnt,(int)(strstr(aline," -")-aline)+1);
  1381                     else
  1382                         cnt_dash++;
  1383 		}
  1384 	    }
  1385             else if (strstr(aline,"- "))
  1386 	    {
  1387 		if (*(strstr(aline,"- ")-1)!='-')
  1388 		{
  1389 		    if (pswit[ECHO_SWITCH])
  1390 			printf("\n%s\n",aline);
  1391 		    if (!pswit[OVERVIEW_SWITCH])
  1392 			printf("    Line %ld column %d - Spaced dash?\n",
  1393 			  linecnt,(int)(strstr(aline,"- ")-aline)+1);
  1394 		    else
  1395 			cnt_dash++;
  1396 		}
  1397 	    }
  1398 	}
  1399         /*
  1400 	 * Check for unmarked paragraphs indicated by separate speakers.
  1401          * May well be false positive:
  1402          * "Bravo!" "Wonderful!" called the crowd.
  1403          * but useful all the same.
  1404 	 */
  1405         s=wrk;
  1406         *s=0;
  1407         if (strstr(aline,"\" \""))
  1408 	    s=strstr(aline,"\" \"");
  1409         if (strstr(aline,"\"  \""))
  1410 	    s=strstr(aline,"\"  \"");
  1411         if (*s)
  1412 	{
  1413             if (pswit[ECHO_SWITCH])
  1414 		printf("\n%s\n",aline);
  1415             if (!pswit[OVERVIEW_SWITCH])
  1416                 printf("    Line %ld column %d - "
  1417 		  "Query missing paragraph break?\n",
  1418 		  linecnt,(int)(s-aline)+1);
  1419             else
  1420                 cnt_punct++;
  1421 	}
  1422         /*
  1423 	 * Check for "to he" and other easy he/be errors.
  1424          * This is a very inadequate effort on the he/be problem,
  1425          * but the phrase "to he" is always an error, whereas "to
  1426          * be" is quite common.
  1427          * Similarly, '"Quiet!", be said.' is a non-be error
  1428          * "to he" is _not_ always an error!:
  1429          *       "Where they went to he couldn't say."
  1430          * Another false positive:
  1431          *       What would "Cinderella" be without the . . .
  1432          * and another: "If he wants to he can see for himself."
  1433 	 */
  1434         s=wrk;
  1435         *s=0;
  1436         if (strstr(aline," to he "))
  1437 	    s=strstr(aline," to he ");
  1438         if (strstr(aline,"\" be "))
  1439 	    s=strstr(aline,"\" be ");
  1440         if (strstr(aline,"\", be "))
  1441 	    s=strstr(aline,"\", be ");
  1442         if (strstr(aline," is be "))
  1443 	    s=strstr(aline," is be ");
  1444         if (strstr(aline," be is "))
  1445 	    s=strstr(aline," be is ");
  1446         if (strstr(aline," was be "))
  1447 	    s=strstr(aline," was be ");
  1448         if (strstr(aline," be would "))
  1449 	    s=strstr(aline," be would ");
  1450         if (strstr(aline," be could "))
  1451 	    s=strstr(aline," be could ");
  1452         if (*s)
  1453 	{
  1454             if (pswit[ECHO_SWITCH])
  1455 		printf("\n%s\n",aline);
  1456             if (!pswit[OVERVIEW_SWITCH])
  1457                 printf("    Line %ld column %d - Query he/be error?\n",
  1458 		  linecnt,(int)(s-aline)+1);
  1459             else
  1460                 cnt_word++;
  1461 	}
  1462         s=wrk;
  1463         *s=0;
  1464         if (strstr(aline," i bad "))
  1465 	    s=strstr(aline," i bad ");
  1466         if (strstr(aline," you bad "))
  1467 	    s=strstr(aline," you bad ");
  1468         if (strstr(aline," he bad "))
  1469 	    s=strstr(aline," he bad ");
  1470         if (strstr(aline," she bad "))
  1471 	    s=strstr(aline," she bad ");
  1472         if (strstr(aline," they bad "))
  1473 	    s=strstr(aline," they bad ");
  1474         if (strstr(aline," a had "))
  1475 	    s=strstr(aline," a had ");
  1476         if (strstr(aline," the had "))
  1477 	    s=strstr(aline," the had ");
  1478         if (*s)
  1479 	{
  1480             if (pswit[ECHO_SWITCH])
  1481 		printf("\n%s\n",aline);
  1482             if (!pswit[OVERVIEW_SWITCH])
  1483                 printf("    Line %ld column %d - Query had/bad error?\n",
  1484 		  linecnt,(int)(s-aline)+1);
  1485             else
  1486                 cnt_word++;
  1487 	}
  1488         s=wrk;
  1489         *s=0;
  1490         if (strstr(aline,", hut "))
  1491 	    s=strstr(aline,", hut ");
  1492         if (strstr(aline,"; hut "))
  1493 	    s=strstr(aline,"; hut ");
  1494         if (*s)
  1495 	{
  1496             if (pswit[ECHO_SWITCH])
  1497 		printf("\n%s\n",aline);
  1498             if (!pswit[OVERVIEW_SWITCH])
  1499                 printf("    Line %ld column %d - Query hut/but error?\n",
  1500 		  linecnt,(int)(s-aline)+1);
  1501             else
  1502                 cnt_word++;
  1503 	}
  1504         /*
  1505 	 * Special case - angled bracket in front of "From" placed there by an
  1506 	 * MTA when sending an e-mail.
  1507 	 */
  1508         if (strstr(aline,">From"))
  1509 	{
  1510             if (pswit[ECHO_SWITCH])
  1511 		printf("\n%s\n",aline);
  1512             if (!pswit[OVERVIEW_SWITCH])
  1513                 printf("    Line %ld column %d - "
  1514 		  "Query angled bracket with From\n",
  1515 		  linecnt,(int)(strstr(aline,">From")-aline)+1);
  1516             else
  1517                 cnt_punct++;
  1518 	}
  1519         /*
  1520 	 * Check for a single character line -
  1521 	 * often an overflow from bad wrapping.
  1522 	 */
  1523         if (*aline && !aline[1])
  1524 	{
  1525             if (*aline=='I' || *aline=='V' || *aline=='X' || *aline=='L' ||
  1526 	      gcisdigit(*aline))
  1527                 ; /* Nothing - ignore numerals alone on a line. */
  1528             else
  1529 	    {
  1530                 if (pswit[ECHO_SWITCH])
  1531 		    printf("\n%s\n",aline);
  1532                 if (!pswit[OVERVIEW_SWITCH])
  1533                     printf("    Line %ld column 1 - "
  1534 		      "Query single character line\n",linecnt);
  1535                 else
  1536                     cnt_punct++;
  1537 	    }
  1538 	}
  1539         /* Check for I" - often should be ! */
  1540         if (strstr(aline," I\""))
  1541 	{
  1542             if (pswit[ECHO_SWITCH])
  1543 		printf("\n%s\n",aline);
  1544             if (!pswit[OVERVIEW_SWITCH])
  1545                 printf("    Line %ld column %ld - Query I=exclamation mark?\n",
  1546 		  linecnt,strstr(aline," I\"")-aline);
  1547             else
  1548                 cnt_punct++;
  1549 	}
  1550         /*
  1551 	 * Check for period without a capital letter. Cut-down from gutspell.
  1552          * Only works when it happens on a single line.
  1553 	 */
  1554         if (pswit[PARANOID_SWITCH])
  1555 	{
  1556             for (t=s=aline;strstr(t,". ");)
  1557 	    {
  1558                 t=strstr(t,". ");
  1559                 if (t==s)
  1560 		{
  1561                     t++;
  1562 		    /* start of line punctuation is handled elsewhere */
  1563                     continue;
  1564 		}
  1565                 if (!gcisalpha(t[-1]))
  1566 		{
  1567                     t++;
  1568                     continue;
  1569 		}
  1570                 if (isDutch)
  1571 		{
  1572 		    /* For Frank & Jeroen -- 's Middags case */
  1573                     if (t[2]==CHAR_SQUOTE && t[3]>='a' && t[3]<='z' &&
  1574 		      t[4]==CHAR_SPACE && t[5]>='A' && t[5]<='Z')
  1575 		    {
  1576                         t++;
  1577                         continue;
  1578 		    }
  1579 		}
  1580                 s1=t+2;
  1581                 while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
  1582                     s1++;
  1583                 if (*s1>='a' && *s1<='z')
  1584 		{
  1585 		    /* we have something to investigate */
  1586                     istypo=1;
  1587 		    /* so let's go back and find out */
  1588                     for (s1=t-1;s1>=s &&
  1589 		      (gcisalpha(*s1) || gcisdigit(*s1) || *s1==CHAR_SQUOTE &&
  1590 		      gcisalpha(s1[1]) && gcisalpha(s1[-1]));s1--)
  1591 			;
  1592                     s1++;
  1593                     for (i=0;*s1 && *s1!='.';s1++,i++)
  1594                         testword[i]=*s1;
  1595                     testword[i]=0;
  1596                     for (i=0;*abbrev[i];i++)
  1597                         if (!strcmp(testword,abbrev[i]))
  1598                             istypo=0;
  1599                     if (gcisdigit(*testword))
  1600 			istypo=0;
  1601                     if (!testword[1])
  1602 			istypo=0;
  1603                     if (isroman(testword))
  1604 			istypo=0;
  1605                     if (istypo)
  1606 		    {
  1607                         istypo=0;
  1608                         for (i=0;testword[i];i++)
  1609                             if (strchr(vowels,testword[i]))
  1610                                 istypo=1;
  1611 		    }
  1612                     if (istypo)
  1613 		    {
  1614                         isdup=0;
  1615                         if (strlen(testword)<MAX_QWORD_LENGTH &&
  1616 			  !pswit[VERBOSE_SWITCH])
  1617                             for (i=0;i<qperiod_index;i++)
  1618                                 if (!strcmp(testword,qperiod[i]))
  1619                                     isdup=1;
  1620                         if (!isdup)
  1621 			{
  1622                             if (qperiod_index<MAX_QWORD &&
  1623 			      strlen(testword)<MAX_QWORD_LENGTH)
  1624 			    {
  1625                                 strcpy(qperiod[qperiod_index],testword);
  1626                                 qperiod_index++;
  1627 			    }
  1628                             if (pswit[ECHO_SWITCH])
  1629 				printf("\n%s\n",aline);
  1630                             if (!pswit[OVERVIEW_SWITCH])
  1631                                 printf("    Line %ld column %d - "
  1632 				  "Extra period?\n",linecnt,(int)(t-aline)+1);
  1633                             else
  1634                                 cnt_punct++;
  1635 			}
  1636 		    }
  1637 		}
  1638 	    t++;
  1639 	    }
  1640 	}
  1641         if (pswit[TYPO_SWITCH])
  1642 	{
  1643             /* Check for words usually not followed by punctuation. */
  1644             for (s=aline;*s;)
  1645 	    {
  1646                 wordstart=s;
  1647                 s=getaword(s,inword);
  1648                 if (!*inword)
  1649 		    continue;
  1650                 lowerit(inword);
  1651                 for (i=0;*nocomma[i];i++)
  1652                     if (!strcmp(inword,nocomma[i]))
  1653 		    {
  1654                         if (*s==',' || *s==';' || *s==':')
  1655 			{
  1656                             if (pswit[ECHO_SWITCH])
  1657 				printf("\n%s\n",aline);
  1658                             if (!pswit[OVERVIEW_SWITCH])
  1659                                 printf("    Line %ld column %d - "
  1660 				  "Query punctuation after %s?\n",
  1661 				  linecnt,(int)(s-aline)+1,inword);
  1662                             else
  1663                                 cnt_punct++;
  1664 			}
  1665 		    }
  1666 		for (i=0;*noperiod[i];i++)
  1667                     if (!strcmp(inword,noperiod[i]))
  1668 		    {
  1669                         if (*s=='.' || *s=='!')
  1670 			{
  1671                             if (pswit[ECHO_SWITCH])
  1672 				printf("\n%s\n",aline);
  1673                             if (!pswit[OVERVIEW_SWITCH])
  1674                                 printf("    Line %ld column %d - "
  1675 				  "Query punctuation after %s?\n",
  1676 				  linecnt,(int)(s-aline)+1,inword);
  1677                             else
  1678                                 cnt_punct++;
  1679 			}
  1680 		    }
  1681 	    }
  1682 	}
  1683         /*
  1684 	 * Check for commonly mistyped words,
  1685 	 * and digits like 0 for O in a word.
  1686 	 */
  1687         for (s=aline;*s;)
  1688 	{
  1689             wordstart=s;
  1690             s=getaword(s,inword);
  1691             if (!*inword)
  1692 		continue; /* don't bother with empty lines */
  1693             if (mixdigit(inword))
  1694 	    {
  1695                 if (pswit[ECHO_SWITCH])
  1696 		    printf("\n%s\n",aline);
  1697                 if (!pswit[OVERVIEW_SWITCH])
  1698                     printf("    Line %ld column %ld - Query digit in %s\n",
  1699 		      linecnt,(int)(wordstart-aline)+1,inword);
  1700                 else
  1701                     cnt_word++;
  1702 	    }
  1703             /*
  1704 	     * Put the word through a series of tests for likely typos and OCR
  1705 	     * errors.
  1706 	     */
  1707             if (pswit[TYPO_SWITCH])
  1708 	    {
  1709                 istypo=0;
  1710                 strcpy(testword,inword);
  1711                 alower=0;
  1712                 for (i=0;i<(signed int)strlen(testword);i++)
  1713 		{
  1714 		    /* lowercase for testing */
  1715                     if (testword[i]>='a' && testword[i]<='z')
  1716 			alower=1;
  1717                     if (alower && testword[i]>='A' && testword[i]<='Z')
  1718 		    {
  1719                         /*
  1720 			 * We have an uppercase mid-word. However, there are
  1721 			 * common cases:
  1722                          *   Mac and Mc like McGill
  1723                          *   French contractions like l'Abbe
  1724 			 */
  1725                         if (i==2 && testword[0]=='m' && testword[1]=='c' ||
  1726                           i==3 && testword[0]=='m' && testword[1]=='a' &&
  1727 			  testword[2]=='c' || i>0 && testword[i-1]==CHAR_SQUOTE)
  1728 			    ; /* do nothing! */
  1729                         else
  1730                             istypo=1;
  1731 		    }
  1732                     testword[i]=(char)tolower(testword[i]);
  1733 		}
  1734                 /*
  1735 		 * Check for certain unlikely two-letter combinations at word
  1736 		 * start and end.
  1737 		 */
  1738                 if (strlen(testword)>1)
  1739 		{
  1740                     for (i=0;*nostart[i];i++)
  1741                         if (!strncmp(testword,nostart[i],2))
  1742                             istypo=1;
  1743                     for (i=0;*noend[i];i++)
  1744                         if (!strncmp(testword+strlen(testword)-2,noend[i],2))
  1745                             istypo=1;
  1746 		}
  1747                 /* ght is common, gbt never. Like that. */
  1748                 if (strstr(testword,"cb"))
  1749 		    istypo=1;
  1750                 if (strstr(testword,"gbt"))
  1751 		    istypo=1;
  1752                 if (strstr(testword,"pbt"))
  1753 		    istypo=1;
  1754                 if (strstr(testword,"tbs"))
  1755 		    istypo=1;
  1756                 if (strstr(testword,"mrn"))
  1757 		    istypo=1;
  1758                 if (strstr(testword,"ahle"))
  1759 		    istypo=1;
  1760                 if (strstr(testword,"ihle"))
  1761 		    istypo=1;
  1762                 /*
  1763 		 * "TBE" does happen - like HEARTBEAT - but uncommon.
  1764                  * Also "TBI" - frostbite, outbid - but uncommon.
  1765                  * Similarly "ii" like Hawaii, or Pompeii, and in Roman
  1766 		 * numerals, but "ii" is a common scanno.
  1767 		 */
  1768                 if (strstr(testword,"tbi"))
  1769 		    istypo=1;
  1770                 if (strstr(testword,"tbe"))
  1771 		    istypo=1;
  1772                 if (strstr(testword,"ii"))
  1773 		    istypo=1;
  1774                 /*
  1775 		 * Check for no vowels or no consonants.
  1776                  * If none, flag a typo.
  1777 		 */
  1778                 if (!istypo && strlen(testword)>1)
  1779 		{
  1780                     vowel=consonant=0;
  1781                     for (i=0;testword[i];i++)
  1782 		    {
  1783                         if (testword[i]=='y' || gcisdigit(testword[i]))
  1784 			{
  1785 			    /* Yah, this is loose. */
  1786                             vowel++;
  1787                             consonant++;
  1788 			}
  1789                         else if (strchr(vowels,testword[i]))
  1790 			    vowel++;
  1791 			else
  1792 			    consonant++;
  1793 		    }
  1794                     if (!vowel || !consonant)
  1795                         istypo=1;
  1796 		}
  1797                 /*
  1798 		 * Now exclude the word from being reported if it's in
  1799                  * the okword list.
  1800 		 */
  1801                 for (i=0;*okword[i];i++)
  1802                     if (!strcmp(testword,okword[i]))
  1803                         istypo=0;
  1804                 /*
  1805 		 * What looks like a typo may be a Roman numeral.
  1806 		 * Exclude these.
  1807 		 */
  1808                 if (istypo && isroman(testword))
  1809 		    istypo=0;
  1810                 /* Check the manual list of typos. */
  1811                 if (!istypo)
  1812                     for (i=0;*typo[i];i++)
  1813                         if (!strcmp(testword,typo[i]))
  1814                             istypo=1;
  1815                 /*
  1816 		 * Check lowercase s, l, i and m - special cases.
  1817                  *   "j" - often a semi-colon gone wrong.
  1818                  *   "d" for a missing apostrophe - he d
  1819                  *   "n" for "in"
  1820 		 */
  1821                 if (!istypo && strlen(testword)==1 && strchr("slmijdn",*inword))
  1822 		    istypo=1;
  1823                 if (istypo)
  1824 		{
  1825                     isdup=0;
  1826                     if (strlen(testword)<MAX_QWORD_LENGTH &&
  1827 		      !pswit[VERBOSE_SWITCH])
  1828                         for (i=0;i<qword_index;i++)
  1829                             if (!strcmp(testword,qword[i]))
  1830 			    {
  1831                                 isdup=1;
  1832                                 ++dupcnt[i];
  1833 			    }
  1834                     if (!isdup)
  1835 		    {
  1836                         if (qword_index<MAX_QWORD &&
  1837 			  strlen(testword)<MAX_QWORD_LENGTH)
  1838 			{
  1839                             strcpy(qword[qword_index],testword);
  1840                             qword_index++;
  1841 			}
  1842                         if (pswit[ECHO_SWITCH])
  1843 			    printf("\n%s\n",aline);
  1844                         if (!pswit[OVERVIEW_SWITCH])
  1845 			{
  1846                             printf("    Line %ld column %d - Query word %s",
  1847 			      linecnt,(int)(wordstart-aline)+1,inword);
  1848                             if (strlen(testword)<MAX_QWORD_LENGTH &&
  1849 			      !pswit[VERBOSE_SWITCH])
  1850                                 printf(" - not reporting duplicates");
  1851                             printf("\n");
  1852 			}
  1853                         else
  1854                             cnt_word++;
  1855 		    }
  1856 		}
  1857 	    }
  1858 	    /* check the user's list of typos */
  1859 	    if (!istypo && usertypo_count)
  1860 		for (i=0;i<usertypo_count;i++)
  1861 		    if (!strcmp(testword,usertypo[i]))
  1862 		    {
  1863 			if (pswit[ECHO_SWITCH])
  1864 			    printf("\n%s\n",aline);
  1865 			if (!pswit[OVERVIEW_SWITCH])  
  1866 			    printf("    Line %ld column %d - "
  1867 			      "Query possible scanno %s\n",
  1868 			      linecnt,(int)(wordstart-aline)+2,inword);
  1869 		    }
  1870             if (pswit[PARANOID_SWITCH] && warn_digit)
  1871 	    {
  1872 		/* In paranoid mode, query all 0 and 1 standing alone. */
  1873                 if (!strcmp(inword,"0") || !strcmp(inword,"1"))
  1874 		{
  1875                     if (pswit[ECHO_SWITCH])
  1876 			printf("\n%s\n",aline);
  1877                     if (!pswit[OVERVIEW_SWITCH])
  1878                         printf("    Line %ld column %d - Query standalone %s\n",
  1879 			  linecnt,(int)(wordstart-aline)+2,inword);
  1880                     else
  1881                         cnt_word++;
  1882 		}
  1883 	    }
  1884 	}
  1885 	/*
  1886          * Look for added or missing spaces around punctuation and quotes.
  1887          * If there is a punctuation character like ! with no space on
  1888          * either side, suspect a missing!space. If there are spaces on
  1889          * both sides , assume a typo. If we see a double quote with no
  1890          * space or punctuation on either side of it, assume unspaced
  1891          * quotes "like"this.
  1892 	 */
  1893         llen=strlen(aline);
  1894         for (i=1;i<llen;i++)
  1895 	{
  1896 	    /* For each character in the line after the first. */
  1897             if (strchr(".?!,;:_",aline[i]))  /* if it's punctuation */
  1898 	    {
  1899 		/* we need to suppress warnings for acronyms like M.D. */
  1900                 isacro=0;
  1901 		/* we need to suppress warnings for ellipsis . . . */
  1902                 isellipsis=0;
  1903 		/* if there are letters on both sides of it or ... */
  1904                 if (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1]) ||
  1905                    gcisalpha(aline[i+1]) && strchr("?!,;:",aline[i]))
  1906 		{
  1907 		    /* ...if it's strict punctuation followed by an alpha */
  1908                     if (aline[i]=='.')
  1909 		    {
  1910                         if (i>2 && aline[i-2]=='.')
  1911 			    isacro=1;
  1912                         if (i+2<llen && aline[i+2]=='.')
  1913 			    isacro=1;
  1914 		    }
  1915                     if (!isacro)
  1916 		    {
  1917                         if (pswit[ECHO_SWITCH])
  1918 			    printf("\n%s\n",aline);
  1919                         if (!pswit[OVERVIEW_SWITCH])
  1920                             printf("    Line %ld column %d - Missing space?\n",
  1921 			      linecnt,i+1);
  1922                         else
  1923                             cnt_punct++;
  1924 		    }
  1925 		}
  1926                 if (aline[i-1]==CHAR_SPACE &&
  1927 		  (aline[i+1]==CHAR_SPACE || aline[i+1]==0))
  1928 		{
  1929 		    /*
  1930 		     * If there are spaces on both sides,
  1931 		     * or space before and end of line.
  1932 		     */
  1933                     if (aline[i]=='.')
  1934 		    {
  1935                         if (i>2 && aline[i-2]=='.')
  1936 			    isellipsis=1;
  1937                         if (i+2<llen && aline[i+2]=='.')
  1938 			    isellipsis=1;
  1939 		    }
  1940                     if (!isemptyline && !isellipsis)
  1941 		    {
  1942                         if (pswit[ECHO_SWITCH])
  1943 			    printf("\n%s\n",aline);
  1944                         if (!pswit[OVERVIEW_SWITCH])
  1945                             printf("    Line %ld column %d - "
  1946 			      "Spaced punctuation?\n",linecnt,i+1);
  1947                         else
  1948                             cnt_punct++;
  1949 		    }
  1950 		}
  1951 	    }
  1952 	}
  1953         /* Split out the characters that CANNOT be preceded by space. */
  1954         llen=strlen(aline);
  1955         for (i=1;i<llen;i++)
  1956 	{
  1957 	    /* for each character in the line after the first */
  1958             if (strchr("?!,;:",aline[i]))
  1959 	    {
  1960 		/* if it's punctuation that _cannot_ have a space before it */
  1961                 if (aline[i-1]==CHAR_SPACE && !isemptyline &&
  1962 		  aline[i+1]!=CHAR_SPACE)
  1963 		{
  1964 		    /*
  1965 		     * If aline[i+1) DOES == space,
  1966 		     * it was already reported just above.
  1967 		     */
  1968                     if (pswit[ECHO_SWITCH])
  1969 			printf("\n%s\n",aline);
  1970                     if (!pswit[OVERVIEW_SWITCH])
  1971                         printf("    Line %ld column %d - Spaced punctuation?\n",
  1972 			  linecnt,i+1);
  1973                     else
  1974                         cnt_punct++;
  1975 		}
  1976 	    }
  1977 	}
  1978         /*
  1979 	 * Special case " .X" where X is any alpha.
  1980          * This plugs a hole in the acronym code above.
  1981 	 * Inelegant, but maintainable.
  1982 	 */
  1983         llen=strlen(aline);
  1984         for (i=1;i<llen;i++)
  1985 	{
  1986 	    /* for each character in the line after the first */
  1987             if (aline[i]=='.')
  1988 	    {
  1989 		/* if it's a period */
  1990                 if (aline[i-1]==CHAR_SPACE && gcisalpha(aline[i+1]))
  1991 		{
  1992 		    /*
  1993 		     * If the period follows a space and
  1994 		     * is followed by a letter.
  1995 		     */
  1996                     if (pswit[ECHO_SWITCH])
  1997 			printf("\n%s\n",aline);
  1998                     if (!pswit[OVERVIEW_SWITCH])
  1999                         printf("    Line %ld column %d - Spaced punctuation?\n",
  2000 			  linecnt,i+1);
  2001                     else
  2002                         cnt_punct++;
  2003 		}
  2004 	    }
  2005 	}
  2006         for (i=1;i<llen;i++)
  2007 	{
  2008 	    /* for each character in the line after the first */
  2009             if (aline[i]==CHAR_DQUOTE)
  2010 	    {
  2011                 if (!strchr(" _-.'`,;:!/([{?}])",aline[i-1]) &&
  2012 		  !strchr(" _-.'`,;:!/([{?}])",aline[i+1]) && aline[i+1] ||
  2013 		  !strchr(" _-([{'`",aline[i-1]) && gcisalpha(aline[i+1]))
  2014 		{
  2015 		    if (pswit[ECHO_SWITCH])
  2016 			printf("\n%s\n",aline);
  2017 		    if (!pswit[OVERVIEW_SWITCH])
  2018 			printf("    Line %ld column %d - Unspaced quotes?\n",
  2019 			  linecnt,i+1);
  2020 		    else
  2021 			cnt_punct++;
  2022 		}
  2023 	    }
  2024 	}
  2025         /* Check parity of quotes. */
  2026         for (s=aline;*s;s++)
  2027 	{
  2028             if (*s==CHAR_DQUOTE)
  2029 	    {
  2030                 if (!(dquotepar=!dquotepar))
  2031 		{
  2032 		    /* parity even */
  2033                     if (!strchr("_-.'`/,;:!?)]} ",s[1]))
  2034 		    {
  2035                         if (pswit[ECHO_SWITCH])
  2036 			    printf("\n%s\n",aline);
  2037                         if (!pswit[OVERVIEW_SWITCH])
  2038                             printf("    Line %ld column %d - "
  2039 			      "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
  2040                         else
  2041                             cnt_punct++;
  2042 		    }
  2043 		}
  2044                 else
  2045 		{
  2046 		    /* parity odd */
  2047                     if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
  2048 		      !strchr("_-/.'`([{$",s[1]) || !s[1])
  2049 		    {
  2050                         if (pswit[ECHO_SWITCH])
  2051 			    printf("\n%s\n",aline);
  2052                         if (!pswit[OVERVIEW_SWITCH])
  2053                             printf("    Line %ld column %d - "
  2054 			      "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
  2055                         else
  2056                             cnt_punct++;
  2057 		    }
  2058 		}
  2059 	    }
  2060 	}
  2061 	if (*aline==CHAR_DQUOTE)
  2062 	{
  2063 	    if (strchr(",;:!?)]} ",aline[1]))
  2064 	    {
  2065 		if (pswit[ECHO_SWITCH])
  2066 		    printf("\n%s\n",aline);
  2067 		if (!pswit[OVERVIEW_SWITCH])
  2068 		    printf("    Line %ld column 1 - Wrongspaced quotes?\n",
  2069 		      linecnt,(int)(s-aline)+1);
  2070 		else
  2071 		    cnt_punct++;
  2072 	    }
  2073 	}
  2074         if (pswit[SQUOTE_SWITCH])
  2075 	{
  2076             for (s=aline;*s;s++)
  2077 	    {
  2078                 if ((*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE) &&
  2079 		  (s==aline || s>aline && !gcisalpha(s[-1]) ||
  2080 		  !gcisalpha(s[1])))
  2081 		{
  2082                     if (!(squotepar=!squotepar))
  2083 		    {
  2084 			/* parity even */
  2085                         if (!strchr("_-.'`/\",;:!?)]} ",s[1]))
  2086 			{
  2087                             if (pswit[ECHO_SWITCH])
  2088 				printf("\n%s\n",aline);
  2089                             if (!pswit[OVERVIEW_SWITCH])
  2090                                 printf("    Line %ld column %d - "
  2091 				  "Wrongspaced singlequotes?\n",
  2092 				  linecnt,(int)(s-aline)+1);
  2093                             else
  2094                                 cnt_punct++;
  2095 			}
  2096 		    }
  2097                     else
  2098 		    {
  2099 			/* parity odd */
  2100                         if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
  2101 			  !strchr("_-/\".'`",s[1]) || !s[1])
  2102 			{
  2103                             if (pswit[ECHO_SWITCH])
  2104 				printf("\n%s\n",aline);
  2105                             if (!pswit[OVERVIEW_SWITCH])
  2106                                 printf("    Line %ld column %d - "
  2107 				  "Wrongspaced singlequotes?\n",
  2108 				  linecnt,(int)(s-aline)+1);
  2109                             else
  2110                                 cnt_punct++;
  2111 			}
  2112 		    }
  2113 		}
  2114 	    }
  2115 	}
  2116         /*
  2117 	 * Look for double punctuation like ,. or ,,
  2118          * Thanks to DW for the suggestion!
  2119          * In books with references, ".," and ".;" are common
  2120          * e.g. "etc., etc.," and vol. 1.; vol 3.;
  2121          * OTOH, from my initial tests, there are also fairly
  2122          * common errors. What to do? Make these cases paranoid?
  2123          * ".," is the most common, so warn_dotcomma is used
  2124          * to suppress detailed reporting if it occurs often.
  2125 	 */
  2126         llen=strlen(aline);
  2127         for (i=0;i<llen;i++)
  2128 	{
  2129 	    /* for each punctuation character in the line */
  2130             if (strchr(".?!,;:",aline[i]) && (strchr(".?!,;:",aline[i+1])) &&
  2131 	      aline[i] && aline[i+1])
  2132 	    {
  2133 		/* followed by punctuation, it's a query, unless . . . */
  2134                 if (aline[i]==aline[i+1] && (aline[i]=='.' || aline[i]=='?' ||
  2135 		  aline[i]=='!') ||
  2136 		  !warn_dotcomma && aline[i]=='.' && aline[i+1]==',' ||
  2137 		  isFrench && !strncmp(aline+i,",...",4) ||
  2138 		  isFrench && !strncmp(aline+i,"...,",4) ||
  2139 		  isFrench && !strncmp(aline+i,";...",4) ||
  2140 		  isFrench && !strncmp(aline+i,"...;",4) ||
  2141 		  isFrench && !strncmp(aline+i,":...",4) ||
  2142 		  isFrench && !strncmp(aline+i,"...:",4) ||
  2143 		  isFrench && !strncmp(aline+i,"!...",4) ||
  2144 		  isFrench && !strncmp(aline+i,"...!",4) ||
  2145 		  isFrench && !strncmp(aline+i,"?...",4) ||
  2146 		  isFrench && !strncmp(aline+i,"...?",4))
  2147 		{
  2148 		    if (isFrench && !strncmp(aline+i,",...",4) ||
  2149 		      isFrench && !strncmp(aline+i,"...,",4) ||
  2150 		      isFrench && !strncmp(aline+i,";...",4) ||
  2151 		      isFrench && !strncmp(aline+i,"...;",4) ||
  2152 		      isFrench && !strncmp(aline+i,":...",4) ||
  2153 		      isFrench && !strncmp(aline+i,"...:",4) ||
  2154 		      isFrench && !strncmp(aline+i,"!...",4) ||
  2155 		      isFrench && !strncmp(aline+i,"...!",4) ||
  2156 		      isFrench && !strncmp(aline+i,"?...",4) ||
  2157 		      isFrench && !strncmp(aline+i,"...?",4))
  2158 			i+=4;
  2159 		    ; /* do nothing for .. !! and ?? which can be legit */
  2160 		}
  2161                 else
  2162 		{
  2163                     if (pswit[ECHO_SWITCH])
  2164 			printf("\n%s\n",aline);
  2165                     if (!pswit[OVERVIEW_SWITCH])
  2166                         printf("    Line %ld column %d - Double punctuation?\n",
  2167 			  linecnt,i+1);
  2168                     else
  2169                         cnt_punct++;
  2170 		}
  2171 	    }
  2172 	}
  2173         s=aline;
  2174         while (strstr(s," \" "))
  2175 	{
  2176             if (pswit[ECHO_SWITCH])
  2177 		printf("\n%s\n",aline);
  2178             if (!pswit[OVERVIEW_SWITCH])
  2179                 printf("    Line %ld column %d - Spaced doublequote?\n",
  2180 		  linecnt,(int)(strstr(s," \" ")-aline+1));
  2181             else
  2182                 cnt_punct++;
  2183             s=strstr(s," \" ")+2;
  2184 	}
  2185         s=aline;
  2186         while (strstr(s," ' "))
  2187 	{
  2188             if (pswit[ECHO_SWITCH])
  2189 		printf("\n%s\n",aline);
  2190             if (!pswit[OVERVIEW_SWITCH])
  2191                 printf("    Line %ld column %d - Spaced singlequote?\n",
  2192 		  linecnt,(int)(strstr(s," ' ")-aline+1));
  2193             else
  2194                 cnt_punct++;
  2195             s=strstr(s," ' ")+2;
  2196 	}
  2197         s=aline;
  2198         while (strstr(s," ` "))
  2199 	{
  2200             if (pswit[ECHO_SWITCH])
  2201 		printf("\n%s\n",aline);
  2202             if (!pswit[OVERVIEW_SWITCH])
  2203                 printf("    Line %ld column %d - Spaced singlequote?\n",
  2204 		  linecnt,(int)(strstr(s," ` ")-aline+1));
  2205             else
  2206                 cnt_punct++;
  2207             s=strstr(s," ` ")+2;
  2208 	}
  2209         /* check special case of 'S instead of 's at end of word */
  2210         s=aline+1;
  2211         while (*s)
  2212 	{
  2213             if (*s==CHAR_SQUOTE && s[1]=='S' && s[-1]>='a' && s[-1]<='z')
  2214 	    {
  2215                 if (pswit[ECHO_SWITCH])
  2216 		    printf("\n%s\n",aline);
  2217                 if (!pswit[OVERVIEW_SWITCH])
  2218                     printf("    Line %ld column %d - Capital \"S\"?\n",
  2219 		      linecnt,(int)(s-aline+2));
  2220                 else
  2221                     cnt_punct++;
  2222 	    }
  2223             s++;
  2224 	}
  2225         /*
  2226 	 * Now check special cases - start and end of line -
  2227          * for single and double quotes. Start is sometimes [sic]
  2228          * but better to query it anyway.
  2229          * While we're here, check for dash at end of line.
  2230 	 */
  2231         llen=strlen(aline);
  2232         if (llen>1)
  2233 	{
  2234             if (aline[llen-1]==CHAR_DQUOTE || aline[llen-1]==CHAR_SQUOTE ||
  2235 	      aline[llen-1]==CHAR_OPEN_SQUOTE)
  2236                 if (aline[llen-2]==CHAR_SPACE)
  2237 		{
  2238                     if (pswit[ECHO_SWITCH])
  2239 			printf("\n%s\n",aline);
  2240                     if (!pswit[OVERVIEW_SWITCH])
  2241                         printf("    Line %ld column %d - Spaced quote?\n",
  2242 			  linecnt,llen);
  2243                     else
  2244                         cnt_punct++;
  2245 		}
  2246             if ((aline[0]==CHAR_SQUOTE || aline[0]==CHAR_OPEN_SQUOTE) &&
  2247 	      aline[1]==CHAR_SPACE)
  2248 	    {
  2249 		if (pswit[ECHO_SWITCH])
  2250 		    printf("\n%s\n",aline);
  2251 		if (!pswit[OVERVIEW_SWITCH])
  2252 		    printf("    Line %ld column 1 - Spaced quote?\n",linecnt);
  2253 		else
  2254 		    cnt_punct++;
  2255 	    }
  2256             /*
  2257 	     * Dash at end of line may well be legit - paranoid mode only
  2258              * and don't report em-dash at line-end.
  2259 	     */
  2260             if (pswit[PARANOID_SWITCH] && warn_hyphen)
  2261 	    {
  2262                 for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
  2263 		    ;
  2264                 if (aline[i]=='-' && aline[i-1]!='-')
  2265 		{
  2266                     if (pswit[ECHO_SWITCH])
  2267 			printf("\n%s\n",aline);
  2268                     if (!pswit[OVERVIEW_SWITCH])
  2269                         printf("    Line %ld column %d - "
  2270 			  "Hyphen at end of line?\n",linecnt,i);
  2271 		}
  2272 	    }
  2273 	}
  2274         /*
  2275 	 * Brackets are often unspaced, but shouldn't be surrounded by alpha.
  2276          * If so, suspect a scanno like "a]most".
  2277 	 */
  2278         llen=strlen(aline);
  2279         for (i=1;i<llen-1;i++)
  2280 	{
  2281 	    /* for each bracket character in the line except 1st & last */
  2282             if (strchr("{[()]}",aline[i]) && gcisalpha(aline[i-1]) &&
  2283 	      gcisalpha(aline[i+1]))
  2284 	    {
  2285                 if (pswit[ECHO_SWITCH])
  2286 		    printf("\n%s\n",aline);
  2287                 if (!pswit[OVERVIEW_SWITCH])
  2288                     printf("    Line %ld column %d - Unspaced bracket?\n",
  2289 		      linecnt,i);
  2290                 else
  2291                     cnt_punct++;
  2292 	    }
  2293 	}
  2294         llen=strlen(aline);
  2295         if (warn_endquote)
  2296 	{
  2297             for (i=1;i<llen;i++)
  2298 	    {
  2299 		/* for each character in the line except 1st */
  2300                 if (aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
  2301 		{
  2302 		    if (pswit[ECHO_SWITCH])
  2303 			printf("\n%s\n",aline);
  2304 		    if (!pswit[OVERVIEW_SWITCH])
  2305 			printf("    Line %ld column %d - "
  2306 			  "endquote missing punctuation?\n",linecnt,i);
  2307 		    else
  2308 			cnt_punct++;
  2309 		}
  2310 	    }
  2311 	}
  2312 	/*
  2313          * Check for <HTML TAG>.
  2314          * If there is a < in the line, followed at some point
  2315          * by a > then we suspect HTML.
  2316 	 */
  2317         if (strstr(aline,"<") && strstr(aline,">"))
  2318 	{
  2319             i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);
  2320             if (i>0)
  2321 	    {
  2322                 strncpy(wrk,strstr(aline,"<"),i);
  2323                 wrk[i]=0;
  2324                 if (pswit[ECHO_SWITCH])
  2325 		    printf("\n%s\n",aline);
  2326                 if (!pswit[OVERVIEW_SWITCH])
  2327                     printf("    Line %ld column %d - HTML Tag? %s \n",
  2328 		      linecnt,(int)(strstr(aline,"<")-aline)+1,wrk);
  2329                 else
  2330                     cnt_html++;
  2331 	    }
  2332 	}
  2333         /*
  2334 	 * Check for &symbol; HTML.
  2335          * If there is a & in the line, followed at
  2336          * some point by a ; then we suspect HTML.
  2337 	 */
  2338         if (strstr(aline,"&") && strstr(aline,";"))
  2339 	{
  2340             i=(int)(strstr(aline,";")-strstr(aline,"&")+1);
  2341             for (s=strstr(aline,"&");s<strstr(aline,";");s++)   
  2342                 if (*s==CHAR_SPACE)
  2343 		    i=0;                /* Don't report "Jones & Son;" */
  2344             if (i>0)
  2345 	    {
  2346                 strncpy(wrk,strstr(aline,"&"),i);
  2347                 wrk[i]=0;
  2348                 if (pswit[ECHO_SWITCH])
  2349 		    printf("\n%s\n",aline);
  2350                 if (!pswit[OVERVIEW_SWITCH])
  2351                     printf("    Line %ld column %d - HTML symbol? %s \n",
  2352 		      linecnt,(int)(strstr(aline,"&")-aline)+1,wrk);
  2353                 else
  2354                     cnt_html++;
  2355 	    }
  2356 	}
  2357         /*
  2358 	 * At end of paragraph, check for mismatched quotes.
  2359          * We don't want to report an error immediately, since it is a
  2360          * common convention to omit the quotes at end of paragraph if
  2361          * the next paragraph is a continuation of the same speaker.
  2362          * Where this is the case, the next para should begin with a
  2363          * quote, so we store the warning message and only display it
  2364          * at the top of the next iteration if the new para doesn't
  2365          * start with a quote.
  2366          * The -p switch overrides this default, and warns of unclosed
  2367          * quotes on _every_ paragraph, whether the next begins with a
  2368          * quote or not.
  2369 	 */
  2370         if (isemptyline)
  2371 	{
  2372 	    /* end of para - add up the totals */
  2373             if (quot%2)
  2374                 sprintf(dquote_err,"    Line %ld - Mismatched quotes\n",
  2375 		  linecnt);
  2376             if (pswit[SQUOTE_SWITCH] && open_single_quote &&
  2377 	      open_single_quote!=close_single_quote)
  2378                 sprintf(squote_err,"    Line %ld - Mismatched singlequotes?\n",
  2379 		  linecnt);
  2380             if (pswit[SQUOTE_SWITCH] && open_single_quote &&
  2381 	      open_single_quote!=close_single_quote &&
  2382 	      open_single_quote!=close_single_quote+1)
  2383 		/*
  2384 		 * Flag it to be noted regardless of the
  2385 		 * first char of the next para.
  2386 		 */
  2387                 squot=1;
  2388             if (r_brack)
  2389                 sprintf(rbrack_err,"    Line %ld - "
  2390 		  "Mismatched round brackets?\n",linecnt);
  2391             if (s_brack)
  2392                 sprintf(sbrack_err,"    Line %ld - "
  2393 		  "Mismatched square brackets?\n",linecnt);
  2394             if (c_brack)
  2395                 sprintf(cbrack_err,"    Line %ld - "
  2396 		  "Mismatched curly brackets?\n",linecnt);
  2397             if (c_unders%2)
  2398                 sprintf(unders_err,"    Line %ld - Mismatched underscores?\n",
  2399 		  linecnt);
  2400             quot=s_brack=c_brack=r_brack=c_unders=open_single_quote=
  2401 	      close_single_quote=0;
  2402 	    /* let the next iteration know that it's starting a new para */
  2403             isnewpara=1;
  2404 	}
  2405         /*
  2406 	 * Check for omitted punctuation at end of paragraph by working back
  2407 	 * through prevline. DW.
  2408          * Need to check this only for "normal" paras.
  2409          * So what is a "normal" para?
  2410          *    Not normal if one-liner (chapter headings, etc.)
  2411          *    Not normal if doesn't contain at least one locase letter
  2412          *    Not normal if starts with space
  2413 	 */
  2414         if (isemptyline)
  2415 	{
  2416 	    /* end of para */
  2417             for (s=prevline,i=0;*s && !i;s++)
  2418                 if (gcisletter(*s))
  2419 		    /* use i to indicate the presence of a letter on the line */
  2420                     i=1;
  2421             /*
  2422 	     * This next "if" is a problem.
  2423              * If we say "start_para_line <= linecnt - 1", that includes
  2424 	     * one-line "paragraphs" like chapter heads. Lotsa false positives.
  2425              * If we say "start_para_line < linecnt - 1" it doesn't, but then it
  2426              * misses genuine one-line paragraphs.
  2427 	     */
  2428             if (i && lastblen>2 && start_para_line<linecnt-1 &&
  2429 	      *prevline>CHAR_SPACE)
  2430 	    {
  2431                 for (i=strlen(prevline)-1;
  2432 		  (prevline[i]==CHAR_DQUOTE || prevline[i]==CHAR_SQUOTE) &&
  2433 		  prevline[i]>CHAR_SPACE && i>0;
  2434 		  i--)
  2435 		    ;
  2436                 for (;i>0;i--)
  2437 		{
  2438                     if (gcisalpha(prevline[i]))
  2439 		    {
  2440                         if (pswit[ECHO_SWITCH])
  2441 			    printf("\n%s\n",prevline);
  2442                         if (!pswit[OVERVIEW_SWITCH])
  2443                             printf("    Line %ld column %d - "
  2444 			      "No punctuation at para end?\n",
  2445 			      linecnt-1,strlen(prevline));
  2446                         else
  2447                             cnt_punct++;
  2448                         break;
  2449 		    }
  2450                     if (strchr("-.:!([{?}])",prevline[i]))
  2451                         break;
  2452 		}
  2453 	    }
  2454 	}
  2455         strcpy(prevline,aline);
  2456     }
  2457     fclose(infile);
  2458     if (!pswit[OVERVIEW_SWITCH])
  2459         for (i=0;i<MAX_QWORD;i++)
  2460             if (dupcnt[i])
  2461                 printf("\nNote: Queried word %s was duplicated %d time%s\n",
  2462 		  qword[i],dupcnt[i],"s");
  2463 }
  2464 
  2465 /*
  2466  * flgets:
  2467  *
  2468  * Get one line from the input stream, checking for
  2469  * the existence of exactly one CR/LF line-end per line.
  2470  *
  2471  * Returns: a pointer to the line.
  2472  */
  2473 char *flgets(char *theline,int maxlen,FILE *thefile,long lcnt)
  2474 {
  2475     char c;
  2476     int len,isCR,cint;
  2477     *theline=0;
  2478     len=isCR=0;
  2479     c=cint=fgetc(thefile);
  2480     do
  2481     {
  2482         if (cint==EOF)
  2483             return NULL;
  2484 	/* either way, it's end of line */
  2485         if (c==10)
  2486 	{
  2487             if (isCR)
  2488                 break;
  2489             else
  2490 	    {
  2491 		/* Error - a LF without a preceding CR */
  2492                 if (pswit[LINE_END_SWITCH])
  2493 		{
  2494                     if (pswit[ECHO_SWITCH])
  2495 			printf("\n%s\n",theline);
  2496                     if (!pswit[OVERVIEW_SWITCH])
  2497                         printf("    Line %ld - No CR?\n",lcnt);
  2498                     else
  2499                         cnt_lineend++;
  2500 		}
  2501                 break;
  2502 	    }
  2503 	}
  2504         if (c==13)
  2505 	{
  2506             if (isCR)
  2507 	    {
  2508 		/* Error - two successive CRs */
  2509                 if (pswit[LINE_END_SWITCH])
  2510 		{
  2511                     if (pswit[ECHO_SWITCH])
  2512 			printf("\n%s\n",theline);
  2513                     if (!pswit[OVERVIEW_SWITCH])
  2514                         printf("    Line %ld - Two successive CRs?\n",lcnt);
  2515                     else
  2516                         cnt_lineend++;
  2517 		}
  2518 	    }
  2519             isCR=1;
  2520 	}
  2521         else
  2522 	{
  2523             if (pswit[LINE_END_SWITCH] && isCR)
  2524 	    {
  2525                 if (pswit[ECHO_SWITCH])
  2526 		    printf("\n%s\n",theline);
  2527                 if (!pswit[OVERVIEW_SWITCH])
  2528                     printf("    Line %ld column %d - CR without LF?\n",
  2529 		      lcnt,len+1);
  2530                 else
  2531                     cnt_lineend++;
  2532 	    }
  2533             theline[len]=c;
  2534             len++;
  2535             theline[len]=0;
  2536             isCR=0;
  2537 	}
  2538         c=cint=fgetc(thefile);
  2539     } while(len<maxlen);
  2540     if (pswit[MARKUP_SWITCH])  
  2541         postprocess_for_HTML(theline);
  2542     if (pswit[DP_SWITCH])  
  2543         postprocess_for_DP(theline);
  2544     return theline;
  2545 }
  2546 
  2547 /*
  2548  * mixdigit:
  2549  *
  2550  * Takes a "word" as a parameter, and checks whether it
  2551  * contains a mixture of alpha and digits. Generally, this is an
  2552  * error, but may not be for cases like 4th or L5 12s. 3d.
  2553  *
  2554  * Returns: 0 if no error found, 1 if error.
  2555  */
  2556 int mixdigit(char *checkword)
  2557 {
  2558     int wehaveadigit,wehavealetter,firstdigits,query,wl;
  2559     char *s;
  2560     wehaveadigit=wehavealetter=query=0;
  2561     for (s=checkword;*s;s++)
  2562         if (gcisalpha(*s))
  2563             wehavealetter=1;
  2564         else
  2565             if (gcisdigit(*s))
  2566                 wehaveadigit=1;
  2567     if (wehaveadigit && wehavealetter)
  2568     {
  2569 	/* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
  2570         query=1;
  2571         wl=strlen(checkword);
  2572         for (firstdigits=0;gcisdigit(checkword[firstdigits]);firstdigits++)
  2573             ;
  2574         /* digits, ending in st, rd, nd, th of either case */
  2575         if (firstdigits+2==wl && (matchword(checkword+wl-2,"st") ||
  2576 	  matchword(checkword+wl-2,"rd") || matchword(checkword+wl-2,"nd") ||
  2577 	  matchword(checkword+wl-2,"th")))
  2578 	    query=0;
  2579         if (firstdigits+3==wl && (matchword(checkword+wl-3,"sts") ||
  2580 	  matchword(checkword+wl-3,"rds") || matchword(checkword+wl-3,"nds") ||
  2581 	  matchword(checkword+wl-3,"ths")))
  2582 	    query=0;
  2583         if (firstdigits+3==wl && (matchword(checkword+wl-4,"stly") ||
  2584 	  matchword(checkword+wl-4,"rdly") ||
  2585 	  matchword(checkword+wl-4,"ndly") || matchword(checkword+wl-4,"thly")))
  2586 	    query=0;
  2587         /* digits, ending in l, L, s or d */
  2588         if (firstdigits+1==wl && (checkword[wl-1]=='l' ||
  2589 	  checkword[wl-1]=='L' || checkword[wl-1]=='s' || checkword[wl-1]=='d'))
  2590 	    query=0;
  2591         /*
  2592 	 * L at the start of a number, representing Britsh pounds, like L500.
  2593          * This is cute. We know the current word is mixeddigit. If the first
  2594          * letter is L, there must be at least one digit following. If both
  2595          * digits and letters follow, we have a genuine error, else we have a
  2596          * capital L followed by digits, and we accept that as a non-error.
  2597 	 */
  2598         if (checkword[0]=='L' && !mixdigit(checkword+1))
  2599 	    query=0;
  2600     }
  2601     return query;
  2602 }
  2603 
  2604 /*
  2605  * getaword:
  2606  *
  2607  * Extracts the first/next "word" from the line, and puts
  2608  * it into "thisword". A word is defined as one English word unit--or
  2609  * at least that's the aim.
  2610  *
  2611  * Returns: a pointer to the position in the line where we will start
  2612  *          looking for the next word.
  2613  */
  2614 char *getaword(char *fromline,char *thisword)
  2615 {
  2616     int i,wordlen;
  2617     char *s;
  2618     wordlen=0;
  2619     for (;!gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline;
  2620       fromline++)
  2621 	;
  2622     /*
  2623      * Use a look-ahead to handle exceptions for numbers like 1,000 and 1.35.
  2624      * Especially yucky is the case of L1,000
  2625      * This section looks for a pattern of characters including a digit
  2626      * followed by a comma or period followed by one or more digits.
  2627      * If found, it returns this whole pattern as a word; otherwise we discard
  2628      * the results and resume our normal programming.
  2629      */
  2630     s=fromline;
  2631     for (;(gcisdigit(*s) || gcisalpha(*s) || *s==',' || *s=='.') &&
  2632       wordlen<MAXWORDLEN;s++)
  2633     {
  2634 	thisword[wordlen]=*s;
  2635         wordlen++;
  2636     }
  2637     thisword[wordlen]=0;
  2638     for (i=1;i<wordlen-1;i++)
  2639     {
  2640         if (thisword[i]=='.' || thisword[i]==',')
  2641 	{
  2642             if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1]))
  2643 	    {
  2644                 fromline=s;
  2645                 return fromline;
  2646 	    }
  2647 	}
  2648     }
  2649     /* we didn't find a punctuated number - do the regular getword thing */
  2650     wordlen=0;
  2651     for (;(gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline=='\'') &&
  2652       wordlen<MAXWORDLEN;fromline++)
  2653     {
  2654         thisword[wordlen]=*fromline;
  2655         wordlen++;
  2656     }
  2657     thisword[wordlen]=0;
  2658     return fromline;
  2659 }
  2660 
  2661 /*
  2662  * matchword:
  2663  *
  2664  * A case-insensitive string matcher.
  2665  */
  2666 int matchword(char *checkfor,char *thisword)
  2667 {
  2668     unsigned int ismatch,i;
  2669     if (strlen(checkfor)!=strlen(thisword))
  2670 	return 0;
  2671     ismatch=1;     /* assume a match until we find a difference */
  2672     for (i=0;i<strlen(checkfor);i++)
  2673         if (toupper(checkfor[i])!=toupper(thisword[i]))
  2674             ismatch=0;
  2675     return ismatch;
  2676 }
  2677 
  2678 /*
  2679  * lowerit:
  2680  *
  2681  * Lowercase the line.
  2682  */
  2683 
  2684 void lowerit(char *theline)
  2685 {
  2686     for (;*theline;theline++)
  2687         if (*theline>='A' && *theline<='Z')
  2688             *theline+=32;
  2689 }
  2690 
  2691 /*
  2692  * isroman:
  2693  *
  2694  * Is this word a Roman Numeral?
  2695  *
  2696  * It doesn't actually validate that the number is a valid Roman Numeral--for
  2697  * example it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not
  2698  * what we're here to do. If it passes this, it LOOKS like a Roman numeral.
  2699  * Anyway, the actual Romans were pretty tolerant of bad arithmetic, or
  2700  * expressions thereof, except when it came to taxes. Allow any number of M,
  2701  * an optional D, an optional CM or CD, any number of optional Cs, an optional
  2702  * XL or an optional XC, an optional IX or IV, an optional V and any number
  2703  * of optional Is.
  2704  */
  2705 int isroman(char *t)
  2706 {
  2707     char *s;
  2708     if (!t || !*t)
  2709 	return 0;
  2710     s=t;
  2711     while (*t=='m' && *t)
  2712 	t++;
  2713     if (*t=='d')
  2714 	t++;
  2715     if (*t=='c' && t[1]=='m')
  2716 	t+=2;
  2717     if (*t=='c' && t[1]=='d')
  2718 	t+=2;
  2719     while (*t=='c' && *t)
  2720 	t++;
  2721     if (*t=='x' && t[1]=='l')
  2722 	t+=2;
  2723     if (*t=='x' && t[1]=='c')
  2724 	t+=2;
  2725     if (*t=='l')
  2726 	t++;
  2727     while (*t=='x' && *t)
  2728 	t++;
  2729     if (*t=='i' && t[1]=='x')
  2730 	t+=2;
  2731     if (*t=='i' && t[1]=='v')
  2732 	t+=2;
  2733     if (*t=='v')
  2734 	t++;
  2735     while (*t=='i' && *t)
  2736 	t++;
  2737     return !*t;
  2738 }
  2739 
  2740 /*
  2741  * gcisalpha:
  2742  *
  2743  * A version of isalpha() that is somewhat lenient on 8-bit texts.
  2744  * If we use the standard function, 8-bit accented characters break
  2745  * words, so that tete with accented characters appears to be two words, "t"
  2746  * and "t", with 8-bit characters between them. This causes over-reporting of
  2747  * errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)
  2748  * and ISO-8859-1 character sets, which are the most common PG 8-bit types.
  2749  */
  2750 int gcisalpha(unsigned char c)
  2751 {
  2752     if (c>='a' && c<='z')
  2753 	return 1;
  2754     if (c>='A' && c<='Z')
  2755 	return 1;
  2756     if (c<140)
  2757 	return 0;
  2758     if (c>=192 && c!=208 && c!=215 && c!=222 && c!=240 && c!=247 && c!=254)
  2759 	return 1;
  2760     if (c==140 || c==142 || c==156 || c==158 || c==159)
  2761 	return 1;
  2762     return 0;
  2763 }
  2764 
  2765 /*
  2766  * gcisdigit:
  2767  *
  2768  * A version of isdigit() that doesn't get confused in 8-bit texts.
  2769  */
  2770 int gcisdigit(unsigned char c)
  2771 {   
  2772     return c>='0' && c<='9';
  2773 }
  2774 
  2775 /*
  2776  * gcisletter:
  2777  *
  2778  * A version of isletter() that doesn't get confused in 8-bit texts.
  2779  * NB: this is ISO-8891-1-specific.
  2780  */
  2781 int gcisletter(unsigned char c)
  2782 {   
  2783     return c>='A' && c<='Z' || c>='a' && c<='z' || c>=192;
  2784 }
  2785 
  2786 /*
  2787  * gcstrchr:
  2788  *
  2789  * Wraps strchr to return NULL if the character being searched for is zero.
  2790  */
  2791 char *gcstrchr(char *s,char c)
  2792 {
  2793     if (!c)
  2794 	return NULL;
  2795     return strchr(s,c);
  2796 }
  2797 
  2798 /*
  2799  * postprocess_for_DP:
  2800  *
  2801  * Invoked with the -d switch from flgets().
  2802  * It simply "removes" from the line a hard-coded set of common
  2803  * DP-specific tags, so that the line passed to the main routine has
  2804  * been pre-cleaned of DP markup.
  2805  */
  2806 void postprocess_for_DP(char *theline)
  2807 {
  2808     char *s,*t;
  2809     int i;
  2810     if (!*theline) 
  2811         return;
  2812     for (i=0;*DPmarkup[i];i++)
  2813     {
  2814         s=strstr(theline,DPmarkup[i]);
  2815         while (s)
  2816 	{
  2817             t=s+strlen(DPmarkup[i]);
  2818             while (*t)
  2819 	    {
  2820                 *s=*t;
  2821                 t++;
  2822 		s++;
  2823 	    }
  2824             *s=0;
  2825             s=strstr(theline,DPmarkup[i]);
  2826 	}
  2827     }
  2828 }
  2829 
  2830 /*
  2831  * postprocess_for_HTML:
  2832  *
  2833  * Invoked with the -m switch from flgets().
  2834  * It simply "removes" from the line a hard-coded set of common
  2835  * HTML tags and "replaces" a hard-coded set of common HTML
  2836  * entities, so that the line passed to the main routine has
  2837  * been pre-cleaned of HTML.
  2838  */
  2839 void postprocess_for_HTML(char *theline)
  2840 {
  2841     if (strstr(theline,"<") && strstr(theline,">"))
  2842         while (losemarkup(theline))
  2843             ;
  2844     while (loseentities(theline))
  2845         ;
  2846 }
  2847 
  2848 char *losemarkup(char *theline)
  2849 {
  2850     char *s,*t;
  2851     int i;
  2852     if (!*theline) 
  2853         return NULL;
  2854     s=strstr(theline,"<");
  2855     t=strstr(theline,">");
  2856     if (!s || !t)
  2857 	return NULL;
  2858     for (i=0;*markup[i];i++)
  2859         if (!tagcomp(s+1,markup[i]))
  2860 	{
  2861             if (!t[1])
  2862 	    {
  2863                 *s=0;
  2864                 return s;
  2865 	    }
  2866             else if (t>s)
  2867 	    {
  2868 		strcpy(s,t+1);
  2869 		return s;
  2870 	    }
  2871         }
  2872     /* It's an unrecognized <xxx>. */
  2873     return NULL;
  2874 }
  2875 
  2876 char *loseentities(char *theline)
  2877 {
  2878     int i;
  2879     char *s,*t;
  2880     if (!*theline) 
  2881         return NULL;
  2882     for (i=0;*entities[i].htmlent;i++)
  2883     {
  2884         s=strstr(theline,entities[i].htmlent);
  2885         if (s)
  2886 	{
  2887             t=malloc((size_t)strlen(s));
  2888             if (!t)
  2889 		return NULL;
  2890             strcpy(t,s+strlen(entities[i].htmlent));
  2891             strcpy(s,entities[i].textent);
  2892             strcat(s,t);
  2893             free(t);
  2894             return theline;
  2895 	}
  2896     }
  2897     for (i=0;*entities[i].htmlnum;i++)
  2898     {
  2899         s=strstr(theline,entities[i].htmlnum);
  2900         if (s)
  2901 	{
  2902             t=malloc((size_t)strlen(s));
  2903             if (!t)
  2904 		return NULL;
  2905             strcpy(t,s+strlen(entities[i].htmlnum));
  2906             strcpy(s,entities[i].textent);
  2907             strcat(s,t);
  2908             free(t);
  2909             return theline;
  2910 	}
  2911     }
  2912     return NULL;
  2913 }
  2914 
  2915 int tagcomp(char *strin,char *basetag)
  2916 {
  2917     char *s,*t;
  2918     s=basetag;
  2919     t=strin;
  2920     if (*t=='/')
  2921 	t++; /* ignore a slash */
  2922     while (*s && *t)
  2923     {
  2924         if (tolower(*s)!=tolower(*t))
  2925 	    return 1;
  2926         s++;
  2927 	t++;
  2928     }
  2929     return 0;
  2930 }
  2931 
  2932 void proghelp()
  2933 {
  2934     fputs("Bookloupe version " PACKAGE_VERSION ".\n",stderr);
  2935     fputs("Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
  2936     fputs("Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>.\n",stderr);
  2937     fputs("Bookloupe comes wih ABSOLUTELY NO WARRANTY. "
  2938       "For details, read the file COPYING.\n",stderr);
  2939     fputs("This is Free Software; "
  2940       "you may redistribute it under certain conditions (GPL);\n",stderr);
  2941     fputs("read the file COPYING for details.\n\n",stderr);
  2942     fputs("Usage is: bookloupe [-setpxloyhud] filename\n",stderr);
  2943     fputs("  where -s checks single quotes, -e suppresses echoing lines, "
  2944       "-t checks typos\n",stderr);
  2945     fputs("  -x (paranoid) switches OFF -t and extra checks, "
  2946       "-l turns OFF line-end checks\n",stderr);
  2947     fputs("  -o just displays overview without detail, "
  2948       "-h echoes header fields\n",stderr);
  2949     fputs("  -v (verbose) unsuppresses duplicate reporting, "
  2950       "-m suppresses markup\n",stderr);
  2951     fputs("  -d ignores DP-specific markup,\n",stderr);
  2952     fputs("  -u uses a file gutcheck.typ to query user-defined "
  2953       "possible typos\n",stderr);
  2954     fputs("Sample usage: bookloupe warpeace.txt \n",stderr);
  2955     fputs("\n",stderr);
  2956     fputs("Bookloupe looks for errors in Project Gutenberg(TM) etexts.\n",
  2957       stderr);
  2958     fputs("Bookloupe queries anything it thinks shouldn't be in a PG text; "
  2959       "non-ASCII\n",stderr);
  2960     fputs("characters like accented letters, "
  2961       "lines longer than 75 or shorter than 55,\n",stderr);
  2962     fputs("unbalanced quotes or brackets, "
  2963       "a variety of badly formatted punctuation, \n",stderr);
  2964     fputs("HTML tags, some likely typos. "
  2965       "It is NOT a substitute for human judgement.\n",stderr);
  2966     fputs("\n",stderr);
  2967 }