bookloupe/bookloupe.c
author ali <ali@juiblex.co.uk>
Sun May 26 16:39:48 2013 +0100 (2013-05-26)
changeset 54 23b2ea51b029
parent 53 4c8606eb60c1
child 55 6b786cc05b3c
permissions -rw-r--r--
Break check_for_following_punctuation() out
     1 /*************************************************************************/
     2 /* bookloupe--check for assorted weirdnesses in a PG candidate text file */
     3 /*                                                                       */
     4 /* Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>                  */
     5 /* Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>                     */
     6 /*                                                                       */
     7 /* This program is free software; you can redistribute it and/or modify  */
     8 /* it under the terms of the GNU General Public License as published by  */
     9 /* the Free Software Foundation; either version 2 of the License, or     */
    10 /* (at your option) any later version.                                   */
    11 /*                                                                       */
    12 /* This program is distributed in the hope that it will be useful,       */
    13 /* but WITHOUT ANY WARRANTY; without even the implied warranty of        */
    14 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the          */
    15 /* GNU General Public License for more details.                          */
    16 /*                                                                       */
    17 /* You should have received a copy of the GNU General Public License     */
    18 /* along with this program. If not, see <http://www.gnu.org/licenses/>.  */
    19 /*************************************************************************/
    20 
    21 #include <stdio.h>
    22 #include <stdlib.h>
    23 #include <string.h>
    24 #include <ctype.h>
    25 
    26 #define MAXWORDLEN    80    /* max length of one word             */
    27 #define LINEBUFSIZE 2048    /* buffer size for an input line      */
    28 
    29 #define MAX_USER_TYPOS 1000
    30 #define USERTYPO_FILE "gutcheck.typ"
    31 
    32 #ifndef MAX_PATH
    33 #define MAX_PATH 16384
    34 #endif
    35 
    36 char aline[LINEBUFSIZE];
    37 char prevline[LINEBUFSIZE];
    38 
    39 /* Common typos. */
    40 char *typo[] = {
    41     "teh", "th", "og", "fi", "ro", "adn", "yuo", "ot", "fo", "thet", "ane",
    42     "nad", "te", "ig", "acn",  "ahve", "alot", "anbd", "andt", "awya", "aywa",
    43     "bakc", "om", "btu", "byt", "cna", "cxan", "coudl", "dont", "didnt",
    44     "couldnt", "wouldnt", "doesnt", "shouldnt", "doign", "ehr", "hmi", "hse",
    45     "esle", "eyt", "fitrs", "firts", "foudn", "frmo", "fromt", "fwe", "gaurd",
    46     "gerat", "goign", "gruop", "haev", "hda", "hearign", "seeign", "sayign",
    47     "herat", "hge", "hsa", "hsi", "hte", "htere", "htese", "htey", "htis",
    48     "hvae", "hwich", "idae", "ihs", "iits", "int", "iwll", "iwth", "jsut",
    49     "loev", "sefl", "myu", "nkow", "nver", "nwe", "nwo", "ocur", "ohter",
    50     "omre", "onyl", "otehr", "otu", "owrk", "owuld", "peice", "peices",
    51     "peolpe", "peopel", "perhasp", "perhpas", "pleasent", "poeple", "porblem",
    52     "porblems", "rwite", "saidt", "saidh", "saids", "seh", "smae", "smoe",
    53     "sohw", "stnad", "stopry", "stoyr", "stpo", "tahn", "taht", "tath",
    54     "tehy", "tghe", "tghis", "theri", "theyll", "thgat", "thge", "thier",
    55     "thna", "thne", "thnig", "thnigs", "thsi", "thsoe", "thta", "timne",
    56     "tirne", "tkae", "tthe", "tyhat", "tyhe", "veyr", "vou", "vour", "vrey",
    57     "waht", "wasnt", "awtn", "watn", "wehn", "whic", "whcih", "whihc", "whta",
    58     "wihch", "wief", "wiht", "witha", "wiull", "wnat", "wnated", "wnats",
    59     "woh", "wohle", "wokr", "woudl", "wriet", "wrod", "wroet", "wroking",
    60     "wtih", "wuould", "wya", "yera", "yeras", "yersa", "yoiu", "youve",
    61     "ytou", "yuor", "abead", "ahle", "ahout", "ahove", "altbough", "balf",
    62     "bardly", "bas", "bave", "baving", "bebind", "beld", "belp", "belped",
    63     "ber", "bere", "bim", "bis", "bome", "bouse", "bowever", "buge",
    64     "dehates", "deht", "han", "hecause", "hecome", "heen", "hefore", "hegan",
    65     "hegin", "heing", "helieve", "henefit", "hetter", "hetween", "heyond",
    66     "hig", "higber", "huild", "huy", "hy", "jobn", "joh", "meanwbile",
    67     "memher", "memhers", "numher", "numhers", "perbaps", "prohlem", "puhlic",
    68     "witbout", "arn", "hin", "hirn", "wrok", "wroked", "amd", "aud",
    69     "prornise", "prornised", "modem", "bo", "heside", "chapteb", "chaptee",
    70     "se", ""
    71 };
    72 
    73 char *usertypo[MAX_USER_TYPOS];
    74 
    75 /* Common abbreviations and other OK words not to query as typos. */
    76 char *okword[] = {
    77     "mr", "mrs", "mss", "mssrs", "ft", "pm", "st", "dr", "hmm", "h'm", "hmmm",
    78     "rd", "sh", "br", "pp", "hm", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd",
    79     "pompeii","hawaii","hawaiian", "hotbed", "heartbeat", "heartbeats",
    80     "outbid", "outbids", "frostbite", "frostbitten", ""
    81 };
    82 
    83 /* Common abbreviations that cause otherwise unexplained periods. */
    84 char *abbrev[] = {
    85     "cent", "cents", "viz", "vol", "vols", "vid", "ed", "al", "etc", "op",
    86     "cit", "deg", "min", "chap", "oz", "mme", "mlle", "mssrs", ""
    87 };
    88 
    89 /*
    90  * Two-Letter combinations that rarely if ever start words,
    91  * but are common scannos or otherwise common letter combinations.
    92  */
    93 char *nostart[] = {
    94     "hr", "hl", "cb", "sb", "tb", "wb", "tl", "tn", "rn", "lt", "tj", ""
    95 };
    96 
    97 /*
    98  * Two-Letter combinations that rarely if ever end words,
    99  * but are common scannos or otherwise common letter combinations.
   100  */
   101 char *noend[] = {
   102     "cb", "gb", "pb", "sb", "tb", "wh", "fr", "br", "qu", "tw", "gl", "fl",
   103     "sw", "gr", "sl", "cl", "iy", ""
   104 };
   105 
   106 char *markup[] = {
   107     "a", "b", "big", "blockquote", "body", "br", "center", "col", "div", "em",
   108     "font", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i",
   109     "img", "li", "meta", "ol", "p", "pre", "small", "span", "strong", "sub",
   110     "sup", "table", "td", "tfoot", "thead", "title", "tr", "tt", "u", "ul", ""
   111 };
   112 
   113 char *DPmarkup[] = {
   114     "<sc>", "</sc>", "/*", "*/", "/#", "#/", "/$", "$/", "<tb>", ""
   115 };
   116 
   117 char *nocomma[] = {
   118     "the", "it's", "their", "an", "mrs", "a", "our", "that's", "its", "whose",
   119     "every", "i'll", "your", "my", "mr", "mrs", "mss", "mssrs", "ft", "pm",
   120     "st", "dr", "rd", "pp", "cf", "jr", "sr", "vs", "lb", "lbs", "ltd", "i'm",
   121     "during", "let", "toward", "among", ""
   122 };
   123 
   124 char *noperiod[] = {
   125     "every", "i'm", "during", "that's", "their", "your", "our", "my", "or",
   126     "and", "but", "as", "if", "the", "its", "it's", "until", "than", "whether",
   127     "i'll", "whose", "who", "because", "when", "let", "till", "very", "an",
   128     "among", "those", "into", "whom", "having", "thence", ""
   129 }; 
   130 
   131 char vowels[] = "aeiouàáâãäæèéêëìíîïòóôõöùúûü";
   132 
   133 struct {
   134     char *htmlent;
   135     char *htmlnum;
   136     char *textent;
   137 } entities[] = {
   138     "&amp;",	"&#38;",     "&", 
   139     "&lt;",	"&#60;",     "<",
   140     "&gt;",	"&#62;",     ">",
   141     "&deg;",	"&#176;",    " degrees",
   142     "&pound;",	"&#163;",    "L",
   143     "&quot;",	"&#34;",     "\"", /* quotation mark = APL quote */
   144     "&OElig;",	"&#338;",    "OE", /* latin capital ligature OE */
   145     "&oelig;",	"&#339;",    "oe", /* latin small ligature oe */
   146     "&Scaron;",	"&#352;",    "S", /* latin capital letter S with caron */
   147     "&scaron;",	"&#353;",    "s", /* latin small letter s with caron */
   148     "&Yuml;",	"&#376;",    "Y", /* latin capital letter Y with diaeresis */
   149     "&circ;",	"&#710;",    "",  /* modifier letter circumflex accent */
   150     "&tilde;",	"&#732;",    "~", /* small tilde, U+02DC ISOdia */
   151     "&ensp;",	"&#8194;",   " ", /* en space, U+2002 ISOpub */
   152     "&emsp;",	"&#8195;",   " ", /* em space, U+2003 ISOpub */
   153     "&thinsp;",	"&#8201;",   " ", /* thin space, U+2009 ISOpub */
   154     "&ndash;",	"&#8211;",   "-", /* en dash, U+2013 ISOpub */
   155     "&mdash;",	"&#8212;",   "--", /* em dash, U+2014 ISOpub */
   156     "&rsquo;",	"&#8217;",   "'", /* right single quotation mark */
   157     "&sbquo;",	"&#8218;",   "'", /* single low-9 quotation mark */
   158     "&ldquo;",	"&#8220;",   "\"", /* left double quotation mark */
   159     "&rdquo;",	"&#8221;",   "\"", /* right double quotation mark */
   160     "&bdquo;",	"&#8222;",   "\"", /* double low-9 quotation mark */
   161     "&lsaquo;",	"&#8249;",   "\"", /* single left-pointing angle quotation mark */
   162     "&rsaquo;",	"&#8250;",   "\"", /* single right-pointing angle quotation mark */
   163     "&nbsp;",	"&#160;",    " ", /* no-break space = non-breaking space, */
   164     "&iexcl;",	"&#161;",    "!", /* inverted exclamation mark */
   165     "&cent;",	"&#162;",    "c", /* cent sign */
   166     "&pound;",	"&#163;",    "L", /* pound sign */
   167     "&curren;",	"&#164;",    "$", /* currency sign */
   168     "&yen;",	"&#165;",    "Y", /* yen sign = yuan sign */
   169     "&sect;",	"&#167;",    "--", /* section sign */
   170     "&uml;",	"&#168;",    " ", /* diaeresis = spacing diaeresis */
   171     "&copy;",	"&#169;",    "(C) ", /* copyright sign */
   172     "&ordf;",	"&#170;",    " ", /* feminine ordinal indicator */
   173     "&laquo;",	"&#171;",    "\"", /* left-pointing double angle quotation mark */
   174     "&shy;",	"&#173;",    "-", /* soft hyphen = discretionary hyphen */
   175     "&reg;",	"&#174;",    "(R) ", /* registered sign = registered trade mark sign */
   176     "&macr;",	"&#175;",    " ", /* macron = spacing macron = overline */
   177     "&deg;",	"&#176;",    " degrees", /* degree sign */
   178     "&plusmn;",	"&#177;",    "+-", /* plus-minus sign = plus-or-minus sign */
   179     "&sup2;",	"&#178;",    "2", /* superscript two = superscript digit two */
   180     "&sup3;",	"&#179;",    "3", /* superscript three = superscript digit three */
   181     "&acute;",	"&#180;",    " ", /* acute accent = spacing acute */
   182     "&micro;",	"&#181;",    "m", /* micro sign */
   183     "&para;",	"&#182;",    "--", /* pilcrow sign = paragraph sign */
   184     "&cedil;",	"&#184;",    " ", /* cedilla = spacing cedilla */
   185     "&sup1;",	"&#185;",    "1", /* superscript one = superscript digit one */
   186     "&ordm;",	"&#186;",    " ", /* masculine ordinal indicator */
   187     "&raquo;",	"&#187;",    "\"", /* right-pointing double angle quotation mark */
   188     "&frac14;",	"&#188;",    "1/4", /* vulgar fraction one quarter */
   189     "&frac12;",	"&#189;",    "1/2", /* vulgar fraction one half */
   190     "&frac34;",	"&#190;",    "3/4", /* vulgar fraction three quarters */
   191     "&iquest;",	"&#191;",    "?", /* inverted question mark */
   192     "&Agrave;",	"&#192;",    "A", /* latin capital letter A with grave */
   193     "&Aacute;",	"&#193;",    "A", /* latin capital letter A with acute */
   194     "&Acirc;",	"&#194;",    "A", /* latin capital letter A with circumflex */
   195     "&Atilde;",	"&#195;",    "A", /* latin capital letter A with tilde */
   196     "&Auml;",	"&#196;",    "A", /* latin capital letter A with diaeresis */
   197     "&Aring;",	"&#197;",    "A", /* latin capital letter A with ring above */
   198     "&AElig;",	"&#198;",    "AE", /* latin capital letter AE */
   199     "&Ccedil;",	"&#199;",    "C", /* latin capital letter C with cedilla */
   200     "&Egrave;",	"&#200;",    "E", /* latin capital letter E with grave */
   201     "&Eacute;",	"&#201;",    "E", /* latin capital letter E with acute */
   202     "&Ecirc;",	"&#202;",    "E", /* latin capital letter E with circumflex */
   203     "&Euml;",	"&#203;",    "E", /* latin capital letter E with diaeresis */
   204     "&Igrave;",	"&#204;",    "I", /* latin capital letter I with grave */
   205     "&Iacute;",	"&#205;",    "I", /* latin capital letter I with acute */
   206     "&Icirc;",	"&#206;",    "I", /* latin capital letter I with circumflex */
   207     "&Iuml;",	"&#207;",    "I", /* latin capital letter I with diaeresis */
   208     "&ETH;",	"&#208;",    "E", /* latin capital letter ETH */
   209     "&Ntilde;",	"&#209;",    "N", /* latin capital letter N with tilde */
   210     "&Ograve;",	"&#210;",    "O", /* latin capital letter O with grave */
   211     "&Oacute;",	"&#211;",    "O", /* latin capital letter O with acute */
   212     "&Ocirc;",	"&#212;",    "O", /* latin capital letter O with circumflex */
   213     "&Otilde;",	"&#213;",    "O", /* latin capital letter O with tilde */
   214     "&Ouml;",	"&#214;",    "O", /* latin capital letter O with diaeresis */
   215     "&times;",	"&#215;",    "*", /* multiplication sign */
   216     "&Oslash;",	"&#216;",    "O", /* latin capital letter O with stroke */
   217     "&Ugrave;",	"&#217;",    "U", /* latin capital letter U with grave */
   218     "&Uacute;",	"&#218;",    "U", /* latin capital letter U with acute */
   219     "&Ucirc;",	"&#219;",    "U", /* latin capital letter U with circumflex */
   220     "&Uuml;",	"&#220;",    "U", /* latin capital letter U with diaeresis */
   221     "&Yacute;",	"&#221;",    "Y", /* latin capital letter Y with acute */
   222     "&THORN;",	"&#222;",    "TH", /* latin capital letter THORN */
   223     "&szlig;",	"&#223;",    "sz", /* latin small letter sharp s = ess-zed */
   224     "&agrave;",	"&#224;",    "a", /* latin small letter a with grave */
   225     "&aacute;",	"&#225;",    "a", /* latin small letter a with acute */
   226     "&acirc;",	"&#226;",    "a", /* latin small letter a with circumflex */
   227     "&atilde;",	"&#227;",    "a", /* latin small letter a with tilde */
   228     "&auml;",	"&#228;",    "a", /* latin small letter a with diaeresis */
   229     "&aring;",	"&#229;",    "a", /* latin small letter a with ring above */
   230     "&aelig;",	"&#230;",    "ae", /* latin small letter ae */
   231     "&ccedil;",	"&#231;",    "c", /* latin small letter c with cedilla */
   232     "&egrave;",	"&#232;",    "e", /* latin small letter e with grave */
   233     "&eacute;",	"&#233;",    "e", /* latin small letter e with acute */
   234     "&ecirc;",	"&#234;",    "e", /* latin small letter e with circumflex */
   235     "&euml;",	"&#235;",    "e", /* latin small letter e with diaeresis */
   236     "&igrave;",	"&#236;",    "i", /* latin small letter i with grave */
   237     "&iacute;",	"&#237;",    "i", /* latin small letter i with acute */
   238     "&icirc;",	"&#238;",    "i", /* latin small letter i with circumflex */
   239     "&iuml;",	"&#239;",    "i", /* latin small letter i with diaeresis */
   240     "&eth;",	"&#240;",    "eth", /* latin small letter eth */
   241     "&ntilde;",	"&#241;",    "n", /* latin small letter n with tilde */
   242     "&ograve;",	"&#242;",    "o", /* latin small letter o with grave */
   243     "&oacute;",	"&#243;",    "o", /* latin small letter o with acute */
   244     "&ocirc;",	"&#244;",    "o", /* latin small letter o with circumflex */
   245     "&otilde;",	"&#245;",    "o", /* latin small letter o with tilde */
   246     "&ouml;",	"&#246;",    "o", /* latin small letter o with diaeresis */
   247     "&divide;",	"&#247;",    "/", /* division sign */
   248     "&oslash;",	"&#248;",    "o", /* latin small letter o with stroke */
   249     "&ugrave;",	"&#249;",    "u", /* latin small letter u with grave */
   250     "&uacute;",	"&#250;",    "u", /* latin small letter u with acute */
   251     "&ucirc;",	"&#251;",    "u", /* latin small letter u with circumflex */
   252     "&uuml;",	"&#252;",    "u", /* latin small letter u with diaeresis */
   253     "&yacute;",	"&#253;",    "y", /* latin small letter y with acute */
   254     "&thorn;",	"&#254;",    "th", /* latin small letter thorn */
   255     "&yuml;",	"&#255;",    "y", /* latin small letter y with diaeresis */
   256     "", ""
   257 };
   258 
   259 /* special characters */
   260 #define CHAR_SPACE        32
   261 #define CHAR_TAB           9
   262 #define CHAR_LF           10
   263 #define CHAR_CR           13
   264 #define CHAR_DQUOTE       34
   265 #define CHAR_SQUOTE       39
   266 #define CHAR_OPEN_SQUOTE  96
   267 #define CHAR_TILDE       126
   268 #define CHAR_ASTERISK     42
   269 #define CHAR_FORESLASH    47
   270 #define CHAR_CARAT        94
   271 
   272 #define CHAR_UNDERSCORE    '_'
   273 #define CHAR_OPEN_CBRACK   '{'
   274 #define CHAR_CLOSE_CBRACK  '}'
   275 #define CHAR_OPEN_RBRACK   '('
   276 #define CHAR_CLOSE_RBRACK  ')'
   277 #define CHAR_OPEN_SBRACK   '['
   278 #define CHAR_CLOSE_SBRACK  ']'
   279 
   280 /* longest and shortest normal PG line lengths */
   281 #define LONGEST_PG_LINE   75
   282 #define WAY_TOO_LONG      80
   283 #define SHORTEST_PG_LINE  55
   284 
   285 #define SWITCHES "ESTPXLOYHWVMUD" /* switches:-                            */
   286                                   /*     D - ignore DP-specific markup     */
   287                                   /*     E - echo queried line             */
   288                                   /*     S - check single quotes           */
   289                                   /*     T - check common typos            */
   290                                   /*     P - require closure of quotes on  */
   291                                   /*         every paragraph               */
   292                                   /*     X - "Trust no one" :-) Paranoid!  */
   293                                   /*         Queries everything            */
   294                                   /*     L - line end checking defaults on */
   295                                   /*         -L turns it off               */
   296                                   /*     O - overview. Just shows counts.  */
   297                                   /*     Y - puts errors to stdout         */
   298                                   /*         instead of stderr             */
   299                                   /*     H - Echoes header fields          */
   300                                   /*     M - Ignore markup in < >          */
   301                                   /*     U - Use file of User-defined Typos*/
   302                                   /*     W - Defaults for use on Web upload*/
   303                                   /*     V - Verbose - list EVERYTHING!    */
   304 #define SWITNO 14                 /* max number of switch parms            */
   305                                   /*        - used for defining array-size */
   306 #define MINARGS   1               /* minimum no of args excl switches      */
   307 #define MAXARGS   1               /* maximum no of args excl switches      */
   308 
   309 int pswit[SWITNO];                /* program switches set by SWITCHES      */
   310 
   311 #define ECHO_SWITCH      0
   312 #define SQUOTE_SWITCH    1
   313 #define TYPO_SWITCH      2
   314 #define QPARA_SWITCH     3
   315 #define PARANOID_SWITCH  4
   316 #define LINE_END_SWITCH  5
   317 #define OVERVIEW_SWITCH  6
   318 #define STDOUT_SWITCH    7
   319 #define HEADER_SWITCH    8
   320 #define WEB_SWITCH       9
   321 #define VERBOSE_SWITCH   10
   322 #define MARKUP_SWITCH    11
   323 #define USERTYPO_SWITCH  12
   324 #define DP_SWITCH        13
   325 
   326 long cnt_dquot;       /* for overview mode, count of doublequote queries */
   327 long cnt_squot;       /* for overview mode, count of singlequote queries */
   328 long cnt_brack;       /* for overview mode, count of brackets queries */
   329 long cnt_bin;         /* for overview mode, count of non-ASCII queries */
   330 long cnt_odd;         /* for overview mode, count of odd character queries */
   331 long cnt_long;        /* for overview mode, count of long line errors */
   332 long cnt_short;       /* for overview mode, count of short line queries */
   333 long cnt_punct;       /* for overview mode, count of punctuation and spacing queries */
   334 long cnt_dash;        /* for overview mode, count of dash-related queries */
   335 long cnt_word;        /* for overview mode, count of word queries */
   336 long cnt_html;        /* for overview mode, count of html queries */
   337 long cnt_lineend;     /* for overview mode, count of line-end queries */
   338 long cnt_spacend;     /* count of lines with space at end */
   339 long linecnt;         /* count of total lines in the file */
   340 long checked_linecnt; /* count of lines actually checked */
   341 
   342 void proghelp(void);
   343 void procfile(char *);
   344 
   345 #define LOW_THRESHOLD    0
   346 #define HIGH_THRESHOLD   1
   347 
   348 #define START 0
   349 #define END 1
   350 #define PREV 0
   351 #define NEXT 1
   352 #define FIRST_OF_PAIR 0
   353 #define SECOND_OF_PAIR 1
   354 
   355 #define MAX_WORDPAIR 1000
   356 
   357 char running_from[MAX_PATH];
   358 
   359 int mixdigit(char *);
   360 const char *getaword(const char *,char *);
   361 int matchword(char *,char *);
   362 char *flgets(char *,int,FILE *,long);
   363 void lowerit(char *);
   364 int gcisalpha(unsigned char);
   365 int gcisdigit(unsigned char);
   366 int gcisletter(unsigned char);
   367 char *gcstrchr(char *s,char c);
   368 void postprocess_for_HTML(char *);
   369 char *linehasmarkup(char *);
   370 char *losemarkup(char *);
   371 int tagcomp(char *,char *);
   372 char *loseentities(char *);
   373 int isroman(char *);
   374 int usertypo_count;
   375 void postprocess_for_DP(char *);
   376 
   377 char wrk[LINEBUFSIZE];
   378 
   379 #define MAX_QWORD 50
   380 #define MAX_QWORD_LENGTH 40
   381 char qword[MAX_QWORD][MAX_QWORD_LENGTH];
   382 signed int dupcnt[MAX_QWORD];
   383 
   384 int main(int argc,char **argv)
   385 {
   386     char *argsw,*s;
   387     int i,switno,invarg;
   388     char usertypo_file[MAX_PATH];
   389     FILE *usertypofile;
   390     if (strlen(argv[0])<sizeof(running_from))
   391 	/* save the path to the executable */
   392         strcpy(running_from,argv[0]);
   393     /* find out what directory we're running from */
   394     s=running_from+strlen(running_from);
   395     for (;*s!='/' && *s!='\\' && s>=running_from;s--)
   396         *s=0;
   397     switno=strlen(SWITCHES);
   398     for (i=switno;--i>0;)
   399         pswit[i]=0;           /* initialise switches */
   400     /*
   401      * Standard loop to extract switches.
   402      * When we come out of this loop, the arguments will be
   403      * in argv[0] upwards and the switches used will be
   404      * represented by their equivalent elements in pswit[]
   405      */
   406     while (--argc>0 && **++argv=='-')
   407         for (argsw=argv[0]+1;*argsw!='\0';argsw++)
   408             for (i=switno,invarg=1;(--i>=0) && invarg==1;)
   409                 if ((toupper(*argsw))==SWITCHES[i])
   410 		{
   411                     invarg=0;
   412                     pswit[i]=1;
   413 		}
   414     /* Paranoid checking is turned OFF, not on, by its switch */
   415     pswit[PARANOID_SWITCH]^=1;
   416     if (pswit[PARANOID_SWITCH])
   417 	/* if running in paranoid mode force typo checks as well   */
   418         pswit[TYPO_SWITCH]=pswit[TYPO_SWITCH]^1;
   419     /* Line-end checking is turned OFF, not on, by its switch */
   420     pswit[LINE_END_SWITCH]^=1;
   421     /* Echoing is turned OFF, not on, by its switch */
   422     pswit[ECHO_SWITCH]^=1;
   423     if (pswit[OVERVIEW_SWITCH])
   424 	/* just print summary; don't echo */
   425         pswit[ECHO_SWITCH]=0;
   426     /*
   427      * Web uploads - for the moment, this is really just a placeholder
   428      * until we decide what processing we really want to do on web uploads
   429      */
   430     if (pswit[WEB_SWITCH])
   431     {
   432 	/* specific override for web uploads */
   433         pswit[ECHO_SWITCH]=1;
   434         pswit[SQUOTE_SWITCH]=0;
   435         pswit[TYPO_SWITCH]=1;
   436         pswit[QPARA_SWITCH]=0;
   437         pswit[PARANOID_SWITCH]=1;
   438         pswit[LINE_END_SWITCH]=0;
   439         pswit[OVERVIEW_SWITCH]=0;
   440         pswit[STDOUT_SWITCH]=0;
   441         pswit[HEADER_SWITCH]=1;
   442         pswit[VERBOSE_SWITCH]=0;
   443         pswit[MARKUP_SWITCH]=0;
   444         pswit[USERTYPO_SWITCH]=0;
   445         pswit[DP_SWITCH]=0;
   446     }
   447     if (argc<MINARGS || argc>MAXARGS)
   448     {
   449 	/* check number of args */
   450         proghelp();
   451         return 1;
   452     }
   453     /* read in the user-defined stealth scanno list */
   454     if (pswit[USERTYPO_SWITCH])
   455     {
   456 	/* ... we were told we had one! */
   457         usertypofile=fopen(USERTYPO_FILE,"rb");
   458         if (!usertypofile)
   459 	{
   460 	    /* not in cwd. try excuteable directory. */
   461             strcpy(usertypo_file,running_from);
   462             strcat(usertypo_file,USERTYPO_FILE);
   463             usertypofile=fopen(usertypo_file,"rb");
   464             if (!usertypofile) {
   465 		/* we ain't got no user typo file! */
   466                 printf("   --> I couldn't find gutcheck.typ "
   467 		  "-- proceeding without user typos.\n");
   468 	    }
   469 	}
   470         usertypo_count=0;
   471         if (usertypofile)
   472 	{
   473 	    /* we managed to open a User Typo File! */
   474             if (pswit[USERTYPO_SWITCH])
   475 	    {
   476                 while (flgets(aline,LINEBUFSIZE-1,usertypofile,
   477 		  (long)usertypo_count))
   478 		{
   479                     if (strlen(aline)>1)
   480 		    {
   481                         if ((int)*aline>33)
   482 			{
   483                             s=malloc(strlen(aline)+1);
   484                             if (!s)
   485 			    {
   486                                 fprintf(stderr,"bookloupe: cannot get enough "
   487 				  "memory for user typo file!\n");
   488                                 exit(1);
   489 			    }
   490                             strcpy(s,aline);
   491                             usertypo[usertypo_count]=s;
   492                             usertypo_count++;
   493                             if (usertypo_count>=MAX_USER_TYPOS)
   494 			    {
   495                                 printf("   --> Only %d user-defined typos "
   496 				  "allowed: ignoring the rest\n",
   497 				  MAX_USER_TYPOS);
   498                                 break;
   499 			    }
   500 			}
   501 		    }
   502 		}
   503 	    }
   504             fclose(usertypofile);
   505 	}
   506     }
   507     fprintf(stderr,"bookloupe: Check and report on an e-text\n");
   508     cnt_dquot=cnt_squot=cnt_brack=cnt_bin=cnt_odd=cnt_long=
   509     cnt_short=cnt_punct=cnt_dash=cnt_word=cnt_html=cnt_lineend=
   510     cnt_spacend=0;
   511     procfile(argv[0]);
   512     if (pswit[OVERVIEW_SWITCH])
   513     {
   514 	printf("    Checked %ld lines of %ld (head+foot = %ld)\n\n",
   515 	  checked_linecnt,linecnt,linecnt-checked_linecnt);
   516         printf("    --------------- Queries found --------------\n");
   517         if (cnt_long)
   518 	    printf("    Long lines:                    %14ld\n",cnt_long);
   519         if (cnt_short)
   520 	    printf("    Short lines:                   %14ld\n",cnt_short);
   521         if (cnt_lineend)
   522 	    printf("    Line-end problems:             %14ld\n",cnt_lineend);
   523         if (cnt_word)
   524 	    printf("    Common typos:                  %14ld\n",cnt_word);
   525         if (cnt_dquot)
   526 	    printf("    Unmatched quotes:              %14ld\n",cnt_dquot);
   527         if (cnt_squot)
   528 	    printf("    Unmatched SingleQuotes:        %14ld\n",cnt_squot);
   529         if (cnt_brack)
   530 	    printf("    Unmatched brackets:            %14ld\n",cnt_brack);
   531         if (cnt_bin)
   532 	    printf("    Non-ASCII characters:          %14ld\n",cnt_bin);
   533         if (cnt_odd)
   534 	    printf("    Proofing characters:           %14ld\n",cnt_odd);
   535         if (cnt_punct)
   536 	    printf("    Punctuation & spacing queries: %14ld\n",cnt_punct);
   537         if (cnt_dash)
   538 	    printf("    Non-standard dashes:           %14ld\n",cnt_dash);
   539         if (cnt_html)
   540 	    printf("    Possible HTML tags:            %14ld\n",cnt_html);
   541         printf("\n");
   542         printf("    TOTAL QUERIES                  %14ld\n",
   543           cnt_dquot+cnt_squot+cnt_brack+cnt_bin+cnt_odd+cnt_long+
   544           cnt_short+cnt_punct+cnt_dash+cnt_word+cnt_html+cnt_lineend);
   545     }
   546     return 0;
   547 }
   548 
   549 struct first_pass_results {
   550     long firstline,astline;
   551     long footerline,totlen,binlen,alphalen,endquote_count,shortline,dotcomma;
   552     long fslashline,hyphens,longline,verylongline,htmcount,standalone_digit;
   553     long spacedash,emdash,space_emdash,non_PG_space_emdash,PG_space_emdash;
   554     signed int Dutchcount,Frenchcount;
   555 };
   556 
   557 /*
   558  * first_pass:
   559  *
   560  * Run a first pass - verify that it's a valid PG
   561  * file, decide whether to report some things that
   562  * occur many times in the text like long or short
   563  * lines, non-standard dashes, etc.
   564  */
   565 struct first_pass_results *first_pass(FILE *infile)
   566 {
   567     char laststart=CHAR_SPACE;
   568     const char *s;
   569     signed int i,llen;
   570     unsigned int lastlen=0,lastblen=0;
   571     long spline=0,nspline=0;
   572     static struct first_pass_results results={0};
   573     char inword[MAXWORDLEN]="";
   574     while (fgets(aline,LINEBUFSIZE-1,infile))
   575     {
   576         while (aline[strlen(aline)-1]==10 || aline[strlen(aline)-1]==13)
   577 	    aline[strlen(aline)-1]=0;
   578         linecnt++;
   579         if (strstr(aline,"*END") && strstr(aline,"SMALL PRINT") &&
   580 	  (strstr(aline,"PUBLIC DOMAIN") || strstr(aline,"COPYRIGHT")))
   581 	{
   582             if (spline)
   583                 printf("   --> Duplicate header?\n");
   584             spline=linecnt+1;   /* first line of non-header text, that is */
   585 	}
   586         if (!strncmp(aline,"*** START",9) && strstr(aline,"PROJECT GUTENBERG"))
   587 	{
   588             if (nspline)
   589                 printf("   --> Duplicate header?\n");
   590             nspline=linecnt+1;   /* first line of non-header text, that is */
   591 	}
   592         if (spline || nspline)
   593 	{
   594             lowerit(aline);
   595             if (strstr(aline,"end") && strstr(aline,"project gutenberg"))
   596 	    {
   597                 if (strstr(aline,"end")<strstr(aline,"project gutenberg"))
   598 		{
   599                     if (results.footerline)
   600 		    {
   601 			/* it's an old-form header - we can detect duplicates */
   602                         if (!nspline)
   603                             printf("   --> Duplicate footer?\n");
   604 		    }
   605                     else
   606                         results.footerline=linecnt;
   607 		}
   608 	    }
   609 	}
   610         if (spline)
   611 	    results.firstline=spline;
   612         if (nspline)
   613 	    results.firstline=nspline;  /* override with new */
   614         if (results.footerline)
   615 	    continue;    /* don't count the boilerplate in the footer */
   616         llen=strlen(aline);
   617         results.totlen+=llen;
   618         for (i=0;i<llen;i++)
   619 	{
   620             if ((unsigned char)aline[i]>127)
   621 		results.binlen++;
   622             if (gcisalpha(aline[i]))
   623 		results.alphalen++;
   624             if (i>0 && aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
   625 		results.endquote_count++;
   626 	}
   627         if (strlen(aline)>2 && lastlen>2 && lastlen<SHORTEST_PG_LINE &&
   628 	  lastblen>2 && lastblen>SHORTEST_PG_LINE && laststart!=CHAR_SPACE)
   629 	    results.shortline++;
   630         if (*aline && (unsigned char)aline[strlen(aline)-1]<=CHAR_SPACE)
   631 	    cnt_spacend++;
   632         if (strstr(aline,".,"))
   633 	    results.dotcomma++;
   634         /* only count ast lines for ignoring purposes where there is */
   635         /* locase text on the line */
   636         if (strstr(aline,"*"))
   637 	{
   638             for (s=aline;*s;s++)
   639                 if (*s>='a' && *s<='z')
   640                     break;
   641              if (*s)
   642 		results.astline++;
   643 	}
   644         if (strstr(aline,"/"))
   645             results.fslashline++;
   646         for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
   647 	    ;
   648         if (aline[i]=='-' && aline[i-1]!='-')
   649 	    results.hyphens++;
   650         if (llen>LONGEST_PG_LINE)
   651 	    results.longline++;
   652         if (llen>WAY_TOO_LONG)
   653 	    results.verylongline++;
   654         if (strstr(aline,"<") && strstr(aline,">"))
   655 	{
   656             i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);
   657             if (i>0)
   658                 results.htmcount++;
   659             if (strstr(aline,"<i>"))
   660 		results.htmcount+=4; /* bonus marks! */
   661 	}
   662         /* Check for spaced em-dashes */
   663         if (strstr(aline,"--"))
   664 	{
   665             results.emdash++;
   666             if (*(strstr(aline,"--")-1)==CHAR_SPACE ||
   667                (*(strstr(aline,"--")+2)==CHAR_SPACE))
   668 		results.space_emdash++;
   669             if (*(strstr(aline,"--")-1)==CHAR_SPACE &&
   670                (*(strstr(aline,"--")+2)==CHAR_SPACE))
   671 		/* count of em-dashes with spaces both sides */
   672 		results.non_PG_space_emdash++;
   673             if (*(strstr(aline,"--")-1)!=CHAR_SPACE &&
   674                (*(strstr(aline,"--")+2)!=CHAR_SPACE))
   675 		/* count of PG-type em-dashes with no spaces */
   676 		results.PG_space_emdash++;
   677 	}
   678         for (s=aline;*s;)
   679 	{
   680             s=getaword(s,inword);
   681             if (!strcmp(inword,"hij") || !strcmp(inword,"niet")) 
   682                 results.Dutchcount++;
   683             if (!strcmp(inword,"dans") || !strcmp(inword,"avec")) 
   684                 results.Frenchcount++;
   685             if (!strcmp(inword,"0") || !strcmp(inword,"1")) 
   686                 results.standalone_digit++;
   687 	}
   688         /* Check for spaced dashes */
   689         if (strstr(aline," -") && *(strstr(aline," -")+2)!='-')
   690 	    results.spacedash++;
   691         lastblen=lastlen;
   692         lastlen=strlen(aline);
   693         laststart=aline[0];
   694     }
   695     return &results;
   696 }
   697 
   698 struct warnings {
   699     signed int shortline,longline,bin,dash,dotcomma,ast,fslash,digit,hyphen;
   700     signed int endquote,isDutch,isFrench;
   701 };
   702 
   703 /*
   704  * report_first_pass:
   705  *
   706  * Make some snap decisions based on the first pass results.
   707  */
   708 struct warnings *report_first_pass(struct first_pass_results *results)
   709 {
   710     static struct warnings warnings={0};
   711     if (cnt_spacend>0)
   712         printf("   --> %ld lines in this file have white space at end\n",
   713 	  cnt_spacend);
   714     warnings.dotcomma=1;
   715     if (results->dotcomma>5)
   716     {
   717         warnings.dotcomma=0;
   718         printf("   --> %ld lines in this file contain '.,'. "
   719 	  "Not reporting them.\n",results->dotcomma);
   720     }
   721     /*
   722      * If more than 50 lines, or one-tenth, are short,
   723      * don't bother reporting them.
   724      */
   725     warnings.shortline=1;
   726     if (results->shortline>50 || results->shortline*10>linecnt)
   727     {
   728         warnings.shortline=0;
   729         printf("   --> %ld lines in this file are short. "
   730 	  "Not reporting short lines.\n",results->shortline);
   731     }
   732     /*
   733      * If more than 50 lines, or one-tenth, are long,
   734      * don't bother reporting them.
   735      */
   736     warnings.longline=1;
   737     if (results->longline>50 || results->longline*10>linecnt)
   738     {
   739         warnings.longline=0;
   740         printf("   --> %ld lines in this file are long. "
   741 	  "Not reporting long lines.\n",results->longline);
   742     }
   743     /* If more than 10 lines contain asterisks, don't bother reporting them. */
   744     warnings.ast=1;
   745     if (results->astline>10)
   746     {
   747         warnings.ast=0;
   748         printf("   --> %ld lines in this file contain asterisks. "
   749 	  "Not reporting them.\n",results->astline);
   750     }
   751     /*
   752      * If more than 10 lines contain forward slashes,
   753      * don't bother reporting them.
   754      */
   755     warnings.fslash=1;
   756     if (results->fslashline>10)
   757     {
   758         warnings.fslash=0;
   759         printf("   --> %ld lines in this file contain forward slashes. "
   760 	  "Not reporting them.\n",results->fslashline);
   761     }
   762     /*
   763      * If more than 20 lines contain unpunctuated endquotes,
   764      * don't bother reporting them.
   765      */
   766     warnings.endquote=1;
   767     if (results->endquote_count>20)
   768     {
   769         warnings.endquote=0;
   770         printf("   --> %ld lines in this file contain unpunctuated endquotes. "
   771 	  "Not reporting them.\n",results->endquote_count);
   772     }
   773     /*
   774      * If more than 15 lines contain standalone digits,
   775      * don't bother reporting them.
   776      */
   777     warnings.digit=1;
   778     if (results->standalone_digit>10)
   779     {
   780         warnings.digit=0;
   781         printf("   --> %ld lines in this file contain standalone 0s and 1s. "
   782 	  "Not reporting them.\n",results->standalone_digit);
   783     }
   784     /*
   785      * If more than 20 lines contain hyphens at end,
   786      * don't bother reporting them.
   787      */
   788     warnings.hyphen=1;
   789     if (results->hyphens>20)
   790     {
   791         warnings.hyphen=0;
   792         printf("   --> %ld lines in this file have hyphens at end. "
   793 	  "Not reporting them.\n",results->hyphens);
   794     }
   795     if (results->htmcount>20 && !pswit[MARKUP_SWITCH])
   796     {
   797         printf("   --> Looks like this is HTML. Switching HTML mode ON.\n");
   798         pswit[MARKUP_SWITCH]=1;
   799     }
   800     if (results->verylongline>0)
   801         printf("   --> %ld lines in this file are VERY long!\n",
   802 	  results->verylongline);
   803     /*
   804      * If there are more non-PG spaced dashes than PG em-dashes,
   805      * assume it's deliberate.
   806      * Current PG guidelines say don't use them, but older texts do,
   807      * and some people insist on them whatever the guidelines say.
   808      */
   809     warnings.dash=1;
   810     if (results->spacedash+results->non_PG_space_emdash>
   811       results->PG_space_emdash)
   812     {
   813         warnings.dash=0;
   814         printf("   --> There are %ld spaced dashes and em-dashes. "
   815 	  "Not reporting them.\n",
   816 	  results->spacedash+results->non_PG_space_emdash);
   817     }
   818     /* If more than a quarter of characters are hi-bit, bug out. */
   819     warnings.bin=1;
   820     if (results->binlen*4>results->totlen)
   821     {
   822         printf("   --> This file does not appear to be ASCII. "
   823 	  "Terminating. Best of luck with it!\n");
   824         exit(1);
   825     }
   826     if (results->alphalen*4<results->totlen)
   827     {
   828         printf("   --> This file does not appear to be text. "
   829 	  "Terminating. Best of luck with it!\n");
   830         exit(1);
   831     }
   832     if (results->binlen*100>results->totlen || results->binlen>100)
   833     {
   834         printf("   --> There are a lot of foreign letters here. "
   835 	  "Not reporting them.\n");
   836         warnings.bin=0;
   837     }
   838     warnings.isDutch=0;
   839     if (results->Dutchcount>50)
   840     {
   841         warnings.isDutch=1;
   842         printf("   --> This looks like Dutch - "
   843 	  "switching off dashes and warnings for 's Middags case.\n");
   844     }
   845     warnings.isFrench=0;
   846     if (results->Frenchcount>50)
   847     {
   848         warnings.isFrench=1;
   849         printf("   --> This looks like French - "
   850 	  "switching off some doublepunct.\n");
   851     }
   852     if (results->firstline && results->footerline)
   853         printf("    The PG header and footer appear to be already on.\n");
   854     else
   855     {
   856         if (results->firstline)
   857             printf("    The PG header is on - no footer.\n");
   858         if (results->footerline)
   859             printf("    The PG footer is on - no header.\n");
   860     }
   861     printf("\n");
   862     if (pswit[VERBOSE_SWITCH])
   863     {
   864         warnings.bin=1;
   865         warnings.shortline=1;
   866         warnings.dotcomma=1;
   867         warnings.longline=1;
   868         warnings.dash=1;
   869         warnings.digit=1;
   870         warnings.ast=1;
   871         warnings.fslash=1;
   872         warnings.hyphen=1;
   873         warnings.endquote=1;
   874         printf("   *** Verbose output is ON -- you asked for it! ***\n");
   875     }
   876     if (warnings.isDutch)
   877         warnings.dash=0;
   878     if (results->footerline>0 && results->firstline>0 &&
   879       results->footerline>results->firstline &&
   880       results->footerline-results->firstline<100)
   881     {
   882         printf("   --> I don't really know where this text starts. \n");
   883         printf("       There are no reference points.\n");
   884         printf("       I'm going to have to report the header and footer "
   885 	  "as well.\n");
   886         results->firstline=0;
   887     }
   888     return &warnings;
   889 }
   890 
   891 struct counters {
   892     long quot;
   893     signed int c_unders,c_brack,s_brack,r_brack;
   894     signed int open_single_quote,close_single_quote;
   895 };
   896 
   897 /*
   898  * analyse_quotes:
   899  *
   900  * Look along the line, accumulate the count of quotes, and see
   901  * if this is an empty line - i.e. a line with nothing on it
   902  * but spaces.
   903  * If line has just spaces, period, * and/or - on it, don't
   904  * count it, since empty lines with asterisks or dashes to
   905  * separate sections are common.
   906  *
   907  * Returns: Non-zero if the line is empty.
   908  */
   909 int analyse_quotes(const char *s,struct counters *counters)
   910 {
   911     signed int guessquote=0;
   912     int isemptyline=1;    /* assume the line is empty until proven otherwise */
   913     while (*s)
   914     {
   915 	if (*s==CHAR_DQUOTE)
   916 	    counters->quot++;
   917 	if (*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE)
   918 	{
   919 	    if (s==aline)
   920 	    {
   921 		/*
   922 		 * At start of line, it can only be an openquote.
   923 		 * Hardcode a very common exception!
   924 		 */
   925 		if (strncmp(s+2,"tis",3) && strncmp(s+2,"Tis",3))
   926 		    counters->open_single_quote++;
   927 	    }
   928 	    else if (gcisalpha(s[-1]) && gcisalpha(s[1]))
   929 		/* Do nothing! it's definitely an apostrophe, not a quote */
   930 		;
   931 	    /* it's outside a word - let's check it out */
   932 	    else if (*s==CHAR_OPEN_SQUOTE || gcisalpha(s[1]))
   933 	    {
   934 		/* it damwell better BE an openquote */
   935 		if (strncmp(s+1,"tis",3) && strncmp(s+1,"Tis",3))
   936 		    /* hardcode a very common exception! */
   937 		    counters->open_single_quote++;
   938 	    }
   939 	    else
   940 	    {
   941 		/* now - is it a closequote? */
   942 		guessquote=0;   /* accumulate clues */
   943 		if (gcisalpha(s[-1]))
   944 		{
   945 		    /* it follows a letter - could be either */
   946 		    guessquote++;
   947 		    if (s[-1]=='s')
   948 		    {
   949 			/* looks like a plural apostrophe */
   950 			guessquote-=3;
   951 			if (s[1]==CHAR_SPACE)  /* bonus marks! */
   952 			    guessquote-=2;
   953 		    }
   954 		}
   955 		/* it doesn't have a letter either side */
   956 		else if (strchr(".?!,;:",s[-1]) && strchr(".?!,;: ",s[1]))
   957 		    guessquote+=8; /* looks like a closequote */
   958 		else
   959 		    guessquote++;
   960 		if (counters->open_single_quote>counters->close_single_quote)
   961 		    /*
   962 		     * Give it the benefit of some doubt,
   963 		     * if a squote is already open.
   964 		     */
   965 		    guessquote++;
   966 		else
   967 		    guessquote--;
   968 		if (guessquote>=0)
   969 		    counters->close_single_quote++;
   970 	    }
   971 	}
   972 	if (*s!=CHAR_SPACE && *s!='-' && *s!='.' && *s!=CHAR_ASTERISK &&
   973 	  *s!=13 && *s!=10)
   974 	    isemptyline=0;  /* ignore lines like  *  *  *  as spacers */
   975 	if (*s==CHAR_UNDERSCORE)
   976 	    counters->c_unders++;
   977 	if (*s==CHAR_OPEN_CBRACK)
   978 	    counters->c_brack++;
   979 	if (*s==CHAR_CLOSE_CBRACK)
   980 	    counters->c_brack--;
   981 	if (*s==CHAR_OPEN_RBRACK)
   982 	    counters->r_brack++;
   983 	if (*s==CHAR_CLOSE_RBRACK)
   984 	    counters->r_brack--;
   985 	if (*s==CHAR_OPEN_SBRACK)
   986 	    counters->s_brack++;
   987 	if (*s==CHAR_CLOSE_SBRACK)
   988 	    counters->s_brack--;
   989 	s++;
   990     }
   991     return isemptyline;
   992 }
   993 
   994 /*
   995  * check_for_odd_characters:
   996  *
   997  * Check for binary and other odd characters.
   998  */
   999 void check_for_odd_characters(const char *aline,const struct warnings *warnings,
  1000   int isemptyline)
  1001 {
  1002     /* Don't repeat multiple warnings on one line. */
  1003     signed int eNon_A=0,eTab=0,eTilde=0,eCarat=0,eFSlash=0,eAst=0;
  1004     const char *s;
  1005     unsigned char c;
  1006     for (s=aline;*s;s++)
  1007     {
  1008 	c=*(unsigned char *)s;
  1009 	if (!eNon_A && (*s<CHAR_SPACE && *s!=9 && *s!='\n' || c>127))
  1010 	{
  1011 	    if (pswit[ECHO_SWITCH])
  1012 		printf("\n%s\n",aline);
  1013 	    if (!pswit[OVERVIEW_SWITCH])
  1014 		if (c>127 && c<160)
  1015 		    printf("    Line %ld column %d - "
  1016 		      "Non-ISO-8859 character %d\n",linecnt,(int)(s-aline)+1,c);
  1017 		else
  1018 		    printf("    Line %ld column %d - Non-ASCII character %d\n",
  1019 		      linecnt,(int)(s-aline)+1,c);
  1020 	    else
  1021 		cnt_bin++;
  1022 	    eNon_A=1;
  1023 	}
  1024 	if (!eTab && *s==CHAR_TAB)
  1025 	{
  1026 	    if (pswit[ECHO_SWITCH])
  1027 		printf("\n%s\n",aline);
  1028 	    if (!pswit[OVERVIEW_SWITCH])
  1029 		printf("    Line %ld column %d - Tab character?\n",
  1030 		  linecnt,(int)(s-aline)+1);
  1031 	    else
  1032 		cnt_odd++;
  1033 	    eTab=1;
  1034 	}
  1035 	if (!eTilde && *s==CHAR_TILDE)
  1036 	{
  1037 	    /*
  1038 	     * Often used by OCR software to indicate an
  1039 	     * unrecognizable character.
  1040 	     */
  1041 	    if (pswit[ECHO_SWITCH])
  1042 		printf("\n%s\n",aline);
  1043 	    if (!pswit[OVERVIEW_SWITCH])
  1044 		printf("    Line %ld column %d - Tilde character?\n",
  1045 		  linecnt,(int)(s-aline)+1);
  1046 	    else
  1047 		cnt_odd++;
  1048 	    eTilde=1;
  1049 	}
  1050 	if (!eCarat && *s==CHAR_CARAT)
  1051 	{  
  1052 	    if (pswit[ECHO_SWITCH])
  1053 		printf("\n%s\n",aline);
  1054 	    if (!pswit[OVERVIEW_SWITCH])
  1055 		printf("    Line %ld column %d - Carat character?\n",
  1056 		  linecnt,(int)(s-aline)+1);
  1057 	    else
  1058 		cnt_odd++;
  1059 	    eCarat=1;
  1060 	}
  1061 	if (!eFSlash && *s==CHAR_FORESLASH && warnings->fslash)
  1062 	{  
  1063 	    if (pswit[ECHO_SWITCH])
  1064 		printf("\n%s\n",aline);
  1065 	    if (!pswit[OVERVIEW_SWITCH])
  1066 		printf("    Line %ld column %d - Forward slash?\n",
  1067 		  linecnt,(int)(s-aline)+1);
  1068 	    else
  1069 		cnt_odd++;
  1070 	    eFSlash=1;
  1071 	}
  1072 	/*
  1073 	 * Report asterisks only in paranoid mode,
  1074 	 * since they're often deliberate.
  1075 	 */
  1076 	if (!eAst && pswit[PARANOID_SWITCH] && warnings->ast && !isemptyline &&
  1077 	  *s==CHAR_ASTERISK)
  1078 	{
  1079 	    if (pswit[ECHO_SWITCH])
  1080 		printf("\n%s\n",aline);
  1081 	    if (!pswit[OVERVIEW_SWITCH])
  1082 		printf("    Line %ld column %d - Asterisk?\n",
  1083 		  linecnt,(int)(s-aline)+1);
  1084 	    else
  1085 		cnt_odd++;
  1086 	    eAst=1;
  1087 	}
  1088     }
  1089 }
  1090 
  1091 /*
  1092  * check_for_long_line:
  1093  *
  1094  * Check for line too long.
  1095  */
  1096 void check_for_long_line(const char *aline)
  1097 {
  1098     if (strlen(aline)>LONGEST_PG_LINE)
  1099     {
  1100 	if (pswit[ECHO_SWITCH])
  1101 	    printf("\n%s\n",aline);
  1102 	if (!pswit[OVERVIEW_SWITCH])
  1103 	    printf("    Line %ld column %d - Long line %d\n",
  1104 	      linecnt,strlen(aline),strlen(aline));
  1105 	else
  1106 	    cnt_long++;
  1107     }
  1108 }
  1109 
  1110 struct line_properties {
  1111     unsigned int len,blen;
  1112     char start;
  1113 };
  1114 
  1115 /*
  1116  * check_for_short_line:
  1117  *
  1118  * Check for line too short.
  1119  *
  1120  * This one is a bit trickier to implement: we don't want to
  1121  * flag the last line of a paragraph for being short, so we
  1122  * have to wait until we know that our current line is a
  1123  * "normal" line, then report the _previous_ line if it was too
  1124  * short. We also don't want to report indented lines like
  1125  * chapter heads or formatted quotations. We therefore keep
  1126  * last->len as the length of the last line examined, and
  1127  * last->blen as the length of the last but one, and try to
  1128  * suppress unnecessary warnings by checking that both were of
  1129  * "normal" length. We keep the first character of the last
  1130  * line in last->start, and if it was a space, we assume that
  1131  * the formatting is deliberate. I can't figure out a way to
  1132  * distinguish something like a quoted verse left-aligned or
  1133  * the header or footer of a letter from a paragraph of short
  1134  * lines - maybe if I examined the whole paragraph, and if the
  1135  * para has less than, say, 8 lines and if all lines are short,
  1136  * then just assume it's OK? Need to look at some texts to see
  1137  * how often a formula like this would get the right result.
  1138  */
  1139 void check_for_short_line(const char *aline,const struct line_properties *last)
  1140 {
  1141     if (strlen(aline)>1 && last->len>1 && last->len<SHORTEST_PG_LINE &&
  1142       last->blen>1 && last->blen>SHORTEST_PG_LINE && last->start!=CHAR_SPACE)
  1143     {
  1144 	if (pswit[ECHO_SWITCH])
  1145 	    printf("\n%s\n",prevline);
  1146 	if (!pswit[OVERVIEW_SWITCH])
  1147 	    printf("    Line %ld column %d - Short line %d?\n",
  1148 	      linecnt-1,strlen(prevline),strlen(prevline));
  1149 	else
  1150 	    cnt_short++;
  1151     }
  1152 }
  1153 
  1154 /*
  1155  * check_for_starting_punctuation:
  1156  *
  1157  * Look for punctuation other than full ellipses at start of line.
  1158  */
  1159 void check_for_starting_punctuation(const char *aline)
  1160 {
  1161     if (*aline && strchr(".?!,;:",aline[0]) && strncmp(". . .",aline,5))
  1162     {
  1163 	if (pswit[ECHO_SWITCH])
  1164 	    printf("\n%s\n",aline);
  1165 	if (!pswit[OVERVIEW_SWITCH])
  1166 	    printf("    Line %ld column 1 - Begins with punctuation?\n",
  1167 	      linecnt);
  1168 	else
  1169 	    cnt_punct++;
  1170     }
  1171 }
  1172 
  1173 /*
  1174  * check_for_spaced_emdash:
  1175  *
  1176  * Check for spaced em-dashes.
  1177  *
  1178  * We must check _all_ occurrences of "--" on the line
  1179  * hence the loop - even if the first double-dash is OK
  1180  * there may be another that's wrong later on.
  1181  */
  1182 void check_for_spaced_emdash(const char *aline)
  1183 {
  1184     const char *s,*t;
  1185     s=aline;
  1186     while ((t=strstr(s,"--")))
  1187     {
  1188 	if (t>aline && t[-1]==CHAR_SPACE || t[2]==CHAR_SPACE)
  1189 	{
  1190 	    if (pswit[ECHO_SWITCH])
  1191 		printf("\n%s\n",aline);
  1192 	    if (!pswit[OVERVIEW_SWITCH])
  1193 		printf("    Line %ld column %d - Spaced em-dash?\n",
  1194 		  linecnt,(int)(t-aline)+1);
  1195 	    else
  1196 		cnt_dash++;
  1197 	}
  1198 	s=t+2;
  1199     }
  1200 }
  1201 
  1202 /*
  1203  * check_for_spaced_dash:
  1204  *
  1205  * Check for spaced dashes.
  1206  */
  1207 void check_for_spaced_dash(const char *aline)
  1208 {
  1209     const char *s;
  1210     if ((s=strstr(aline," -")))
  1211     {
  1212 	if (s[2]!='-')
  1213 	{
  1214 	    if (pswit[ECHO_SWITCH])
  1215 		printf("\n%s\n",aline);
  1216 	    if (!pswit[OVERVIEW_SWITCH])
  1217 		printf("    Line %ld column %d - Spaced dash?\n",
  1218 		  linecnt,(int)(s-aline)+1);
  1219 	    else
  1220 		cnt_dash++;
  1221 	}
  1222     }
  1223     else if ((s=strstr(aline,"- ")))
  1224     {
  1225 	if (s==aline || s[-1]!='-')
  1226 	{
  1227 	    if (pswit[ECHO_SWITCH])
  1228 		printf("\n%s\n",aline);
  1229 	    if (!pswit[OVERVIEW_SWITCH])
  1230 		printf("    Line %ld column %d - Spaced dash?\n",
  1231 		  linecnt,(int)(s-aline)+1);
  1232 	    else
  1233 		cnt_dash++;
  1234 	}
  1235     }
  1236 }
  1237 
  1238 /*
  1239  * check_for_unmarked_paragraphs:
  1240  *
  1241  * Check for unmarked paragraphs indicated by separate speakers.
  1242  *
  1243  * May well be false positive:
  1244  * "Bravo!" "Wonderful!" called the crowd.
  1245  * but useful all the same.
  1246  */
  1247 void check_for_unmarked_paragraphs(const char *aline)
  1248 {
  1249     const char *s;
  1250     s=strstr(aline,"\"  \"");
  1251     if (!s)
  1252 	s=strstr(aline,"\" \"");
  1253     if (s)
  1254     {
  1255 	if (pswit[ECHO_SWITCH])
  1256 	    printf("\n%s\n",aline);
  1257 	if (!pswit[OVERVIEW_SWITCH])
  1258 	    printf("    Line %ld column %d - Query missing paragraph break?\n",
  1259 	      linecnt,(int)(s-aline)+1);
  1260 	else
  1261 	    cnt_punct++;
  1262     }
  1263 }
  1264 
  1265 /*
  1266  * check_for_jeebies:
  1267  *
  1268  * Check for "to he" and other easy h/b errors.
  1269  *
  1270  * This is a very inadequate effort on the h/b problem,
  1271  * but the phrase "to he" is always an error, whereas "to
  1272  * be" is quite common.
  1273  * Similarly, '"Quiet!", be said.' is a non-be error
  1274  * "to he" is _not_ always an error!:
  1275  *       "Where they went to he couldn't say."
  1276  * Another false positive:
  1277  *       What would "Cinderella" be without the . . .
  1278  * and another: "If he wants to he can see for himself."
  1279  */
  1280 void check_for_jeebies(const char *aline)
  1281 {
  1282     const char *s;
  1283     s=strstr(aline," be could ");
  1284     if (!s)
  1285 	s=strstr(aline," be would ");
  1286     if (!s)
  1287 	s=strstr(aline," was be ");
  1288     if (!s)
  1289 	s=strstr(aline," be is ");
  1290     if (!s)
  1291 	s=strstr(aline," is be ");
  1292     if (!s)
  1293 	s=strstr(aline,"\", be ");
  1294     if (!s)
  1295 	s=strstr(aline,"\" be ");
  1296     if (!s)
  1297 	s=strstr(aline,"\" be ");
  1298     if (!s)
  1299 	s=strstr(aline," to he ");
  1300     if (s)
  1301     {
  1302 	if (pswit[ECHO_SWITCH])
  1303 	    printf("\n%s\n",aline);
  1304 	if (!pswit[OVERVIEW_SWITCH])
  1305 	    printf("    Line %ld column %d - Query he/be error?\n",
  1306 	      linecnt,(int)(s-aline)+1);
  1307 	else
  1308 	    cnt_word++;
  1309     }
  1310     s=strstr(aline," the had ");
  1311     if (!s)
  1312 	s=strstr(aline," a had ");
  1313     if (!s)
  1314 	s=strstr(aline," they bad ");
  1315     if (!s)
  1316 	s=strstr(aline," she bad ");
  1317     if (!s)
  1318 	s=strstr(aline," he bad ");
  1319     if (!s)
  1320 	s=strstr(aline," you bad ");
  1321     if (!s)
  1322 	s=strstr(aline," i bad ");
  1323     if (s)
  1324     {
  1325 	if (pswit[ECHO_SWITCH])
  1326 	    printf("\n%s\n",aline);
  1327 	if (!pswit[OVERVIEW_SWITCH])
  1328 	    printf("    Line %ld column %d - Query had/bad error?\n",
  1329 	      linecnt,(int)(s-aline)+1);
  1330 	else
  1331 	    cnt_word++;
  1332     }
  1333     s=strstr(aline,"; hut ");
  1334     if (!s)
  1335 	s=strstr(aline,", hut ");
  1336     if (s)
  1337     {
  1338 	if (pswit[ECHO_SWITCH])
  1339 	    printf("\n%s\n",aline);
  1340 	if (!pswit[OVERVIEW_SWITCH])
  1341 	    printf("    Line %ld column %d - Query hut/but error?\n",
  1342 	      linecnt,(int)(s-aline)+1);
  1343 	else
  1344 	    cnt_word++;
  1345     }
  1346 }
  1347 
  1348 /*
  1349  * check_for_mta_from:
  1350  *
  1351  * Special case - angled bracket in front of "From" placed there by an
  1352  * MTA when sending an e-mail.
  1353  */
  1354 void check_for_mta_from(const char *aline)
  1355 {
  1356     const char *s;
  1357     s=strstr(aline,">From");
  1358     if (s)
  1359     {
  1360 	if (pswit[ECHO_SWITCH])
  1361 	    printf("\n%s\n",aline);
  1362 	if (!pswit[OVERVIEW_SWITCH])
  1363 	    printf("    Line %ld column %d - Query angled bracket with From\n",
  1364 	      linecnt,(int)(s-aline)+1);
  1365 	else
  1366 	    cnt_punct++;
  1367     }
  1368 }
  1369 
  1370 /*
  1371  * check_for_orphan_character:
  1372  *
  1373  * Check for a single character line -
  1374  * often an overflow from bad wrapping.
  1375  */
  1376 void check_for_orphan_character(const char *aline)
  1377 {
  1378     if (*aline && !aline[1])
  1379     {
  1380 	if (*aline=='I' || *aline=='V' || *aline=='X' || *aline=='L' ||
  1381 	  gcisdigit(*aline))
  1382 	    ; /* Nothing - ignore numerals alone on a line. */
  1383 	else
  1384 	{
  1385 	    if (pswit[ECHO_SWITCH])
  1386 		printf("\n%s\n",aline);
  1387 	    if (!pswit[OVERVIEW_SWITCH])
  1388 		printf("    Line %ld column 1 - Query single character line\n",
  1389 		  linecnt);
  1390 	    else
  1391 		cnt_punct++;
  1392 	}
  1393     }
  1394 }
  1395 
  1396 /*
  1397  * check_for_pling_scanno:
  1398  *
  1399  * Check for I" - often should be !
  1400  */
  1401 void check_for_pling_scanno(const char *aline)
  1402 {
  1403     const char *s;
  1404     s=strstr(aline," I\"");
  1405     if (s)
  1406     {
  1407 	if (pswit[ECHO_SWITCH])
  1408 	    printf("\n%s\n",aline);
  1409 	if (!pswit[OVERVIEW_SWITCH])
  1410 	    printf("    Line %ld column %ld - Query I=exclamation mark?\n",
  1411 	      linecnt,s-aline);
  1412 	else
  1413 	    cnt_punct++;
  1414     }
  1415 }
  1416 
  1417 /*
  1418  * check_for_extra_period:
  1419  *
  1420  * Check for period without a capital letter. Cut-down from gutspell.
  1421  * Only works when it happens on a single line.
  1422  */
  1423 void check_for_extra_period(const char *aline,const struct warnings *warnings)
  1424 {
  1425     const char *s,*t,*s1;
  1426     signed int i,istypo,isdup;
  1427     static char qperiod[MAX_QWORD][MAX_QWORD_LENGTH];
  1428     static int qperiod_index=0;
  1429     char testword[MAXWORDLEN]="";
  1430     if (pswit[PARANOID_SWITCH])
  1431     {
  1432 	for (t=s=aline;strstr(t,". ");)
  1433 	{
  1434 	    t=strstr(t,". ");
  1435 	    if (t==s)
  1436 	    {
  1437 		t++;
  1438 		/* start of line punctuation is handled elsewhere */
  1439 		continue;
  1440 	    }
  1441 	    if (!gcisalpha(t[-1]))
  1442 	    {
  1443 		t++;
  1444 		continue;
  1445 	    }
  1446 	    if (warnings->isDutch)
  1447 	    {
  1448 		/* For Frank & Jeroen -- 's Middags case */
  1449 		if (t[2]==CHAR_SQUOTE && t[3]>='a' && t[3]<='z' &&
  1450 		  t[4]==CHAR_SPACE && t[5]>='A' && t[5]<='Z')
  1451 		{
  1452 		    t++;
  1453 		    continue;
  1454 		}
  1455 	    }
  1456 	    s1=t+2;
  1457 	    while (*s1 && !gcisalpha(*s1) && !isdigit(*s1))
  1458 		s1++;
  1459 	    if (*s1>='a' && *s1<='z')
  1460 	    {
  1461 		/* we have something to investigate */
  1462 		istypo=1;
  1463 		/* so let's go back and find out */
  1464 		for (s1=t-1;s1>=s &&
  1465 		  (gcisalpha(*s1) || gcisdigit(*s1) || *s1==CHAR_SQUOTE &&
  1466 		  gcisalpha(s1[1]) && gcisalpha(s1[-1]));s1--)
  1467 		    ;
  1468 		s1++;
  1469 		for (i=0;*s1 && *s1!='.';s1++,i++)
  1470 		    testword[i]=*s1;
  1471 		testword[i]=0;
  1472 		for (i=0;*abbrev[i];i++)
  1473 		    if (!strcmp(testword,abbrev[i]))
  1474 			istypo=0;
  1475 		if (gcisdigit(*testword))
  1476 		    istypo=0;
  1477 		if (!testword[1])
  1478 		    istypo=0;
  1479 		if (isroman(testword))
  1480 		    istypo=0;
  1481 		if (istypo)
  1482 		{
  1483 		    istypo=0;
  1484 		    for (i=0;testword[i];i++)
  1485 			if (strchr(vowels,testword[i]))
  1486 			    istypo=1;
  1487 		}
  1488 		if (istypo)
  1489 		{
  1490 		    isdup=0;
  1491 		    if (strlen(testword)<MAX_QWORD_LENGTH &&
  1492 		      !pswit[VERBOSE_SWITCH])
  1493 			for (i=0;i<qperiod_index;i++)
  1494 			    if (!strcmp(testword,qperiod[i]))
  1495 				isdup=1;
  1496 		    if (!isdup)
  1497 		    {
  1498 			if (qperiod_index<MAX_QWORD &&
  1499 			  strlen(testword)<MAX_QWORD_LENGTH)
  1500 			{
  1501 			    strcpy(qperiod[qperiod_index],testword);
  1502 			    qperiod_index++;
  1503 			}
  1504 			if (pswit[ECHO_SWITCH])
  1505 			    printf("\n%s\n",aline);
  1506 			if (!pswit[OVERVIEW_SWITCH])
  1507 			    printf("    Line %ld column %d - Extra period?\n",
  1508 			      linecnt,(int)(t-aline)+1);
  1509 			else
  1510 			    cnt_punct++;
  1511 		    }
  1512 		}
  1513 	    }
  1514 	    t++;
  1515 	}
  1516     }
  1517 }
  1518 
  1519 /*
  1520  * check_for_following_punctuation:
  1521  *
  1522  * Check for words usually not followed by punctuation.
  1523  */
  1524 void check_for_following_punctuation(const char *aline)
  1525 {
  1526     int i;
  1527     const char *s,*wordstart;
  1528     char inword[MAXWORDLEN];
  1529     if (pswit[TYPO_SWITCH])
  1530     {
  1531 	for (s=aline;*s;)
  1532 	{
  1533 	    wordstart=s;
  1534 	    s=getaword(s,inword);
  1535 	    if (!*inword)
  1536 		continue;
  1537 	    lowerit(inword);
  1538 	    for (i=0;*nocomma[i];i++)
  1539 		if (!strcmp(inword,nocomma[i]))
  1540 		{
  1541 		    if (*s==',' || *s==';' || *s==':')
  1542 		    {
  1543 			if (pswit[ECHO_SWITCH])
  1544 			    printf("\n%s\n",aline);
  1545 			if (!pswit[OVERVIEW_SWITCH])
  1546 			    printf("    Line %ld column %d - "
  1547 			      "Query punctuation after %s?\n",
  1548 			      linecnt,(int)(s-aline)+1,inword);
  1549 			else
  1550 			    cnt_punct++;
  1551 		    }
  1552 		}
  1553 	    for (i=0;*noperiod[i];i++)
  1554 		if (!strcmp(inword,noperiod[i]))
  1555 		{
  1556 		    if (*s=='.' || *s=='!')
  1557 		    {
  1558 			if (pswit[ECHO_SWITCH])
  1559 			    printf("\n%s\n",aline);
  1560 			if (!pswit[OVERVIEW_SWITCH])
  1561 			    printf("    Line %ld column %d - "
  1562 			      "Query punctuation after %s?\n",
  1563 			      linecnt,(int)(s-aline)+1,inword);
  1564 			else
  1565 			    cnt_punct++;
  1566 		    }
  1567 		}
  1568 	}
  1569     }
  1570 }
  1571 
  1572 /*
  1573  * procfile:
  1574  *
  1575  * Process one file.
  1576  */
  1577 void procfile(char *filename)
  1578 {
  1579     const char *s,*t,*wordstart;
  1580     char inword[MAXWORDLEN],testword[MAXWORDLEN];
  1581     char parastart[81];     /* first line of current para */
  1582     FILE *infile;
  1583     struct first_pass_results *first_pass_results;
  1584     struct warnings *warnings;
  1585     struct counters counters={0};
  1586     struct line_properties last={0};
  1587     int isemptyline;
  1588     long squot,start_para_line;
  1589     signed int i,llen,isacro,isellipsis,istypo,alower;
  1590     signed int dquotepar,squotepar;
  1591     signed int isnewpara,vowel,consonant;
  1592     char dquote_err[80],squote_err[80],rbrack_err[80],sbrack_err[80],
  1593       cbrack_err[80],unders_err[80];
  1594     signed int qword_index,isdup;
  1595     signed int enddash;
  1596     last.start=CHAR_SPACE;
  1597     *dquote_err=*squote_err=*rbrack_err=*cbrack_err=*sbrack_err=
  1598       *unders_err=*prevline=0;
  1599     linecnt=checked_linecnt=start_para_line=0;
  1600     squot=0;
  1601     i=llen=isacro=isellipsis=0;
  1602     isnewpara=vowel=consonant=enddash=0;
  1603     qword_index=0;
  1604     *inword=*testword=0;
  1605     dquotepar=squotepar=0;
  1606     infile=fopen(filename,"rb");
  1607     if (!infile)
  1608     {
  1609         if (pswit[STDOUT_SWITCH])
  1610             fprintf(stdout,"bookloupe: cannot open %s\n",filename);
  1611         else
  1612             fprintf(stderr,"bookloupe: cannot open %s\n",filename);
  1613 	exit(1);
  1614     }
  1615     fprintf(stdout,"\n\nFile: %s\n\n",filename);
  1616     first_pass_results=first_pass(infile);
  1617     warnings=report_first_pass(first_pass_results);
  1618     rewind(infile);
  1619     /*
  1620      * Here we go with the main pass. Hold onto yer hat!
  1621      * Re-init some variables we've dirtied.
  1622      */
  1623     squot=linecnt=0;
  1624     while (flgets(aline,LINEBUFSIZE-1,infile,linecnt+1))
  1625     {
  1626         linecnt++;
  1627         if (linecnt==1)
  1628 	    isnewpara=1;
  1629         if (pswit[DP_SWITCH] && !strncmp(aline,"-----File: ",11))
  1630 	    continue;    // skip DP page separators completely
  1631         if (linecnt<first_pass_results->firstline ||
  1632 	  (first_pass_results->footerline>0 &&
  1633 	  linecnt>first_pass_results->footerline))
  1634 	{
  1635             if (pswit[HEADER_SWITCH])
  1636 	    {
  1637                 if (!strncmp(aline,"Title:",6))
  1638                     printf("    %s\n",aline);
  1639                 if (!strncmp(aline,"Author:",7))
  1640                     printf("    %s\n",aline);
  1641                 if (!strncmp(aline,"Release Date:",13))
  1642                     printf("    %s\n",aline);
  1643                 if (!strncmp(aline,"Edition:",8))
  1644                     printf("    %s\n\n",aline);
  1645 	    }
  1646             continue;                /* skip through the header */
  1647 	}
  1648         checked_linecnt++;
  1649         s=aline;
  1650         /*
  1651 	 * If we are in a state of unbalanced quotes, and this line
  1652          * doesn't begin with a quote, output the stored error message.
  1653          * If the -P switch was used, print the warning even if the
  1654          * new para starts with quotes.
  1655 	 */
  1656         t=s;
  1657         while (*t==' ')
  1658 	    t++;
  1659         if (*dquote_err)
  1660             if (*t!=CHAR_DQUOTE || pswit[QPARA_SWITCH])
  1661 	    {
  1662                 if (!pswit[OVERVIEW_SWITCH])
  1663 		{
  1664                     if (pswit[ECHO_SWITCH])
  1665 			printf("\n%s\n",parastart);
  1666                     printf(dquote_err);
  1667 		}
  1668                 else
  1669                     cnt_dquot++;
  1670             }
  1671         if (*squote_err)
  1672 	{
  1673             if (*t!=CHAR_SQUOTE && *t!=CHAR_OPEN_SQUOTE ||
  1674 	      pswit[QPARA_SWITCH] || squot)
  1675 	    {
  1676                 if (!pswit[OVERVIEW_SWITCH])
  1677 		{
  1678                     if (pswit[ECHO_SWITCH])
  1679 			printf("\n%s\n",parastart);
  1680                     printf(squote_err);
  1681 		}
  1682                 else
  1683                     cnt_squot++;
  1684 	    }
  1685             squot=0;
  1686 	}
  1687         if (*rbrack_err)
  1688 	{
  1689             if (!pswit[OVERVIEW_SWITCH])
  1690 	    {
  1691                 if (pswit[ECHO_SWITCH])
  1692 		    printf("\n%s\n",parastart);
  1693                 printf(rbrack_err);
  1694 	    }
  1695             else
  1696                 cnt_brack++;
  1697 	}
  1698         if (*sbrack_err)
  1699 	{
  1700             if (!pswit[OVERVIEW_SWITCH])
  1701 	    {
  1702                 if (pswit[ECHO_SWITCH])
  1703 		    printf("\n%s\n",parastart);
  1704                 printf(sbrack_err);
  1705 	    }
  1706             else
  1707                 cnt_brack++;
  1708 	}
  1709         if (*cbrack_err)
  1710 	{
  1711             if (!pswit[OVERVIEW_SWITCH])
  1712 	    {
  1713                 if (pswit[ECHO_SWITCH])
  1714 		    printf("\n%s\n",parastart);
  1715                 printf(cbrack_err);
  1716 	    }
  1717             else
  1718                 cnt_brack++;
  1719 	}
  1720         if (*unders_err)
  1721 	{
  1722             if (!pswit[OVERVIEW_SWITCH])
  1723 	    {
  1724                 if (pswit[ECHO_SWITCH])
  1725 		    printf("\n%s\n",parastart);
  1726                 printf(unders_err);
  1727 	    }
  1728             else
  1729                 cnt_brack++;
  1730 	}
  1731         *dquote_err=*squote_err=*rbrack_err=*cbrack_err= 
  1732 	  *sbrack_err=*unders_err=0;
  1733 	isemptyline=analyse_quotes(aline,&counters);
  1734         if (isnewpara && !isemptyline)
  1735 	{
  1736 	    /* This line is the start of a new paragraph. */
  1737             start_para_line=linecnt;
  1738 	    /* Capture its first line in case we want to report it later. */
  1739             strncpy(parastart,aline,80);
  1740             parastart[79]=0;
  1741             dquotepar=squotepar=0; /* restart the quote count */
  1742             s=aline;
  1743             while (!gcisalpha(*s) && !gcisdigit(*s) && *s)
  1744 		s++;
  1745             if (*s>='a' && *s<='z')
  1746 	    {
  1747 		/* and its first letter is lowercase */
  1748                 if (pswit[ECHO_SWITCH])
  1749 		    printf("\n%s\n",aline);
  1750                 if (!pswit[OVERVIEW_SWITCH])
  1751                     printf("    Line %ld column %d - "
  1752 		      "Paragraph starts with lower-case\n",
  1753 		      linecnt,(int)(s-aline)+1);
  1754                 else
  1755                     cnt_punct++;
  1756 	    }
  1757             isnewpara=0; /* Signal the end of new para processing. */
  1758 	}
  1759         /* Check for an em-dash broken at line end. */
  1760         if (enddash && *aline=='-')
  1761 	{
  1762             if (pswit[ECHO_SWITCH])
  1763 		printf("\n%s\n",aline);
  1764             if (!pswit[OVERVIEW_SWITCH])
  1765                 printf("    Line %ld column 1 - Broken em-dash?\n",linecnt);
  1766             else
  1767                 cnt_punct++;
  1768 	}
  1769         enddash=0;
  1770         for (s=aline+strlen(aline)-1;*s==' ' && s>aline;s--)
  1771 	    ;
  1772         if (s>=aline && *s=='-')
  1773             enddash=1;
  1774 	/*
  1775          * Check for invalid or questionable characters in the line
  1776          * Anything above 127 is invalid for plain ASCII, and
  1777          * non-printable control characters should also be flagged.
  1778          * Tabs should generally not be there.
  1779 	 */
  1780         for (s=aline;*s;s++)
  1781 	{
  1782             i=(unsigned char)*s;
  1783             if (i<CHAR_SPACE && i!=CHAR_LF && i!=CHAR_CR && i!=CHAR_TAB)
  1784 	    {
  1785                 if (pswit[ECHO_SWITCH])
  1786 		    printf("\n%s\n",aline);
  1787                 if (!pswit[OVERVIEW_SWITCH])
  1788                     printf("    Line %ld column %d - Control character %d\n",
  1789 		      linecnt,(int)(s-aline)+1,i);
  1790                 else
  1791                     cnt_bin++;
  1792 	    }
  1793 	}
  1794         if (warnings->bin)
  1795 	    check_for_odd_characters(aline,warnings,isemptyline);
  1796         if (warnings->longline)
  1797 	    check_for_long_line(aline);
  1798         if (warnings->shortline)
  1799 	    check_for_short_line(aline,&last);
  1800         last.blen=last.len;
  1801         last.len=strlen(aline);
  1802         last.start=aline[0];
  1803 	check_for_starting_punctuation(aline);
  1804         if (warnings->dash)
  1805 	{
  1806 	    check_for_spaced_emdash(aline);
  1807 	    check_for_spaced_dash(aline);
  1808 	}
  1809 	check_for_unmarked_paragraphs(aline);
  1810 	check_for_jeebies(aline);
  1811 	check_for_mta_from(aline);
  1812 	check_for_orphan_character(aline);
  1813 	check_for_pling_scanno(aline);
  1814 	check_for_extra_period(aline,warnings);
  1815 	check_for_following_punctuation(aline);
  1816         /*
  1817 	 * Check for commonly mistyped words,
  1818 	 * and digits like 0 for O in a word.
  1819 	 */
  1820         for (s=aline;*s;)
  1821 	{
  1822             wordstart=s;
  1823             s=getaword(s,inword);
  1824             if (!*inword)
  1825 		continue; /* don't bother with empty lines */
  1826             if (mixdigit(inword))
  1827 	    {
  1828                 if (pswit[ECHO_SWITCH])
  1829 		    printf("\n%s\n",aline);
  1830                 if (!pswit[OVERVIEW_SWITCH])
  1831                     printf("    Line %ld column %d - Query digit in %s\n",
  1832 		      linecnt,(int)(wordstart-aline)+1,inword);
  1833                 else
  1834                     cnt_word++;
  1835 	    }
  1836             /*
  1837 	     * Put the word through a series of tests for likely typos and OCR
  1838 	     * errors.
  1839 	     */
  1840             if (pswit[TYPO_SWITCH])
  1841 	    {
  1842                 istypo=0;
  1843                 strcpy(testword,inword);
  1844                 alower=0;
  1845                 for (i=0;i<(signed int)strlen(testword);i++)
  1846 		{
  1847 		    /* lowercase for testing */
  1848                     if (testword[i]>='a' && testword[i]<='z')
  1849 			alower=1;
  1850                     if (alower && testword[i]>='A' && testword[i]<='Z')
  1851 		    {
  1852                         /*
  1853 			 * We have an uppercase mid-word. However, there are
  1854 			 * common cases:
  1855                          *   Mac and Mc like McGill
  1856                          *   French contractions like l'Abbe
  1857 			 */
  1858                         if (i==2 && testword[0]=='m' && testword[1]=='c' ||
  1859                           i==3 && testword[0]=='m' && testword[1]=='a' &&
  1860 			  testword[2]=='c' || i>0 && testword[i-1]==CHAR_SQUOTE)
  1861 			    ; /* do nothing! */
  1862                         else
  1863                             istypo=1;
  1864 		    }
  1865                     testword[i]=(char)tolower(testword[i]);
  1866 		}
  1867                 /*
  1868 		 * Check for certain unlikely two-letter combinations at word
  1869 		 * start and end.
  1870 		 */
  1871                 if (strlen(testword)>1)
  1872 		{
  1873                     for (i=0;*nostart[i];i++)
  1874                         if (!strncmp(testword,nostart[i],2))
  1875                             istypo=1;
  1876                     for (i=0;*noend[i];i++)
  1877                         if (!strncmp(testword+strlen(testword)-2,noend[i],2))
  1878                             istypo=1;
  1879 		}
  1880                 /* ght is common, gbt never. Like that. */
  1881                 if (strstr(testword,"cb"))
  1882 		    istypo=1;
  1883                 if (strstr(testword,"gbt"))
  1884 		    istypo=1;
  1885                 if (strstr(testword,"pbt"))
  1886 		    istypo=1;
  1887                 if (strstr(testword,"tbs"))
  1888 		    istypo=1;
  1889                 if (strstr(testword,"mrn"))
  1890 		    istypo=1;
  1891                 if (strstr(testword,"ahle"))
  1892 		    istypo=1;
  1893                 if (strstr(testword,"ihle"))
  1894 		    istypo=1;
  1895                 /*
  1896 		 * "TBE" does happen - like HEARTBEAT - but uncommon.
  1897                  * Also "TBI" - frostbite, outbid - but uncommon.
  1898                  * Similarly "ii" like Hawaii, or Pompeii, and in Roman
  1899 		 * numerals, but "ii" is a common scanno.
  1900 		 */
  1901                 if (strstr(testword,"tbi"))
  1902 		    istypo=1;
  1903                 if (strstr(testword,"tbe"))
  1904 		    istypo=1;
  1905                 if (strstr(testword,"ii"))
  1906 		    istypo=1;
  1907                 /*
  1908 		 * Check for no vowels or no consonants.
  1909                  * If none, flag a typo.
  1910 		 */
  1911                 if (!istypo && strlen(testword)>1)
  1912 		{
  1913                     vowel=consonant=0;
  1914                     for (i=0;testword[i];i++)
  1915 		    {
  1916                         if (testword[i]=='y' || gcisdigit(testword[i]))
  1917 			{
  1918 			    /* Yah, this is loose. */
  1919                             vowel++;
  1920                             consonant++;
  1921 			}
  1922                         else if (strchr(vowels,testword[i]))
  1923 			    vowel++;
  1924 			else
  1925 			    consonant++;
  1926 		    }
  1927                     if (!vowel || !consonant)
  1928                         istypo=1;
  1929 		}
  1930                 /*
  1931 		 * Now exclude the word from being reported if it's in
  1932                  * the okword list.
  1933 		 */
  1934                 for (i=0;*okword[i];i++)
  1935                     if (!strcmp(testword,okword[i]))
  1936                         istypo=0;
  1937                 /*
  1938 		 * What looks like a typo may be a Roman numeral.
  1939 		 * Exclude these.
  1940 		 */
  1941                 if (istypo && isroman(testword))
  1942 		    istypo=0;
  1943                 /* Check the manual list of typos. */
  1944                 if (!istypo)
  1945                     for (i=0;*typo[i];i++)
  1946                         if (!strcmp(testword,typo[i]))
  1947                             istypo=1;
  1948                 /*
  1949 		 * Check lowercase s, l, i and m - special cases.
  1950                  *   "j" - often a semi-colon gone wrong.
  1951                  *   "d" for a missing apostrophe - he d
  1952                  *   "n" for "in"
  1953 		 */
  1954                 if (!istypo && strlen(testword)==1 && strchr("slmijdn",*inword))
  1955 		    istypo=1;
  1956                 if (istypo)
  1957 		{
  1958                     isdup=0;
  1959                     if (strlen(testword)<MAX_QWORD_LENGTH &&
  1960 		      !pswit[VERBOSE_SWITCH])
  1961                         for (i=0;i<qword_index;i++)
  1962                             if (!strcmp(testword,qword[i]))
  1963 			    {
  1964                                 isdup=1;
  1965                                 ++dupcnt[i];
  1966 			    }
  1967                     if (!isdup)
  1968 		    {
  1969                         if (qword_index<MAX_QWORD &&
  1970 			  strlen(testword)<MAX_QWORD_LENGTH)
  1971 			{
  1972                             strcpy(qword[qword_index],testword);
  1973                             qword_index++;
  1974 			}
  1975                         if (pswit[ECHO_SWITCH])
  1976 			    printf("\n%s\n",aline);
  1977                         if (!pswit[OVERVIEW_SWITCH])
  1978 			{
  1979                             printf("    Line %ld column %d - Query word %s",
  1980 			      linecnt,(int)(wordstart-aline)+1,inword);
  1981                             if (strlen(testword)<MAX_QWORD_LENGTH &&
  1982 			      !pswit[VERBOSE_SWITCH])
  1983                                 printf(" - not reporting duplicates");
  1984                             printf("\n");
  1985 			}
  1986                         else
  1987                             cnt_word++;
  1988 		    }
  1989 		}
  1990 	    }
  1991 	    /* check the user's list of typos */
  1992 	    if (!istypo && usertypo_count)
  1993 		for (i=0;i<usertypo_count;i++)
  1994 		    if (!strcmp(testword,usertypo[i]))
  1995 		    {
  1996 			if (pswit[ECHO_SWITCH])
  1997 			    printf("\n%s\n",aline);
  1998 			if (!pswit[OVERVIEW_SWITCH])  
  1999 			    printf("    Line %ld column %d - "
  2000 			      "Query possible scanno %s\n",
  2001 			      linecnt,(int)(wordstart-aline)+2,inword);
  2002 		    }
  2003             if (pswit[PARANOID_SWITCH] && warnings->digit)
  2004 	    {
  2005 		/* In paranoid mode, query all 0 and 1 standing alone. */
  2006                 if (!strcmp(inword,"0") || !strcmp(inword,"1"))
  2007 		{
  2008                     if (pswit[ECHO_SWITCH])
  2009 			printf("\n%s\n",aline);
  2010                     if (!pswit[OVERVIEW_SWITCH])
  2011                         printf("    Line %ld column %d - Query standalone %s\n",
  2012 			  linecnt,(int)(wordstart-aline)+2,inword);
  2013                     else
  2014                         cnt_word++;
  2015 		}
  2016 	    }
  2017 	}
  2018 	/*
  2019          * Look for added or missing spaces around punctuation and quotes.
  2020          * If there is a punctuation character like ! with no space on
  2021          * either side, suspect a missing!space. If there are spaces on
  2022          * both sides , assume a typo. If we see a double quote with no
  2023          * space or punctuation on either side of it, assume unspaced
  2024          * quotes "like"this.
  2025 	 */
  2026         llen=strlen(aline);
  2027         for (i=1;i<llen;i++)
  2028 	{
  2029 	    /* For each character in the line after the first. */
  2030             if (strchr(".?!,;:_",aline[i]))  /* if it's punctuation */
  2031 	    {
  2032 		/* we need to suppress warnings for acronyms like M.D. */
  2033                 isacro=0;
  2034 		/* we need to suppress warnings for ellipsis . . . */
  2035                 isellipsis=0;
  2036 		/* if there are letters on both sides of it or ... */
  2037                 if (gcisalpha(aline[i-1]) && gcisalpha(aline[i+1]) ||
  2038                    gcisalpha(aline[i+1]) && strchr("?!,;:",aline[i]))
  2039 		{
  2040 		    /* ...if it's strict punctuation followed by an alpha */
  2041                     if (aline[i]=='.')
  2042 		    {
  2043                         if (i>2 && aline[i-2]=='.')
  2044 			    isacro=1;
  2045                         if (i+2<llen && aline[i+2]=='.')
  2046 			    isacro=1;
  2047 		    }
  2048                     if (!isacro)
  2049 		    {
  2050                         if (pswit[ECHO_SWITCH])
  2051 			    printf("\n%s\n",aline);
  2052                         if (!pswit[OVERVIEW_SWITCH])
  2053                             printf("    Line %ld column %d - Missing space?\n",
  2054 			      linecnt,i+1);
  2055                         else
  2056                             cnt_punct++;
  2057 		    }
  2058 		}
  2059                 if (aline[i-1]==CHAR_SPACE &&
  2060 		  (aline[i+1]==CHAR_SPACE || aline[i+1]==0))
  2061 		{
  2062 		    /*
  2063 		     * If there are spaces on both sides,
  2064 		     * or space before and end of line.
  2065 		     */
  2066                     if (aline[i]=='.')
  2067 		    {
  2068                         if (i>2 && aline[i-2]=='.')
  2069 			    isellipsis=1;
  2070                         if (i+2<llen && aline[i+2]=='.')
  2071 			    isellipsis=1;
  2072 		    }
  2073                     if (!isemptyline && !isellipsis)
  2074 		    {
  2075                         if (pswit[ECHO_SWITCH])
  2076 			    printf("\n%s\n",aline);
  2077                         if (!pswit[OVERVIEW_SWITCH])
  2078                             printf("    Line %ld column %d - "
  2079 			      "Spaced punctuation?\n",linecnt,i+1);
  2080                         else
  2081                             cnt_punct++;
  2082 		    }
  2083 		}
  2084 	    }
  2085 	}
  2086         /* Split out the characters that CANNOT be preceded by space. */
  2087         llen=strlen(aline);
  2088         for (i=1;i<llen;i++)
  2089 	{
  2090 	    /* for each character in the line after the first */
  2091             if (strchr("?!,;:",aline[i]))
  2092 	    {
  2093 		/* if it's punctuation that _cannot_ have a space before it */
  2094                 if (aline[i-1]==CHAR_SPACE && !isemptyline &&
  2095 		  aline[i+1]!=CHAR_SPACE)
  2096 		{
  2097 		    /*
  2098 		     * If aline[i+1) DOES == space,
  2099 		     * it was already reported just above.
  2100 		     */
  2101                     if (pswit[ECHO_SWITCH])
  2102 			printf("\n%s\n",aline);
  2103                     if (!pswit[OVERVIEW_SWITCH])
  2104                         printf("    Line %ld column %d - Spaced punctuation?\n",
  2105 			  linecnt,i+1);
  2106                     else
  2107                         cnt_punct++;
  2108 		}
  2109 	    }
  2110 	}
  2111         /*
  2112 	 * Special case " .X" where X is any alpha.
  2113          * This plugs a hole in the acronym code above.
  2114 	 * Inelegant, but maintainable.
  2115 	 */
  2116         llen=strlen(aline);
  2117         for (i=1;i<llen;i++)
  2118 	{
  2119 	    /* for each character in the line after the first */
  2120             if (aline[i]=='.')
  2121 	    {
  2122 		/* if it's a period */
  2123                 if (aline[i-1]==CHAR_SPACE && gcisalpha(aline[i+1]))
  2124 		{
  2125 		    /*
  2126 		     * If the period follows a space and
  2127 		     * is followed by a letter.
  2128 		     */
  2129                     if (pswit[ECHO_SWITCH])
  2130 			printf("\n%s\n",aline);
  2131                     if (!pswit[OVERVIEW_SWITCH])
  2132                         printf("    Line %ld column %d - Spaced punctuation?\n",
  2133 			  linecnt,i+1);
  2134                     else
  2135                         cnt_punct++;
  2136 		}
  2137 	    }
  2138 	}
  2139         for (i=1;i<llen;i++)
  2140 	{
  2141 	    /* for each character in the line after the first */
  2142             if (aline[i]==CHAR_DQUOTE)
  2143 	    {
  2144                 if (!strchr(" _-.'`,;:!/([{?}])",aline[i-1]) &&
  2145 		  !strchr(" _-.'`,;:!/([{?}])",aline[i+1]) && aline[i+1] ||
  2146 		  !strchr(" _-([{'`",aline[i-1]) && gcisalpha(aline[i+1]))
  2147 		{
  2148 		    if (pswit[ECHO_SWITCH])
  2149 			printf("\n%s\n",aline);
  2150 		    if (!pswit[OVERVIEW_SWITCH])
  2151 			printf("    Line %ld column %d - Unspaced quotes?\n",
  2152 			  linecnt,i+1);
  2153 		    else
  2154 			cnt_punct++;
  2155 		}
  2156 	    }
  2157 	}
  2158         /* Check parity of quotes. */
  2159         for (s=aline;*s;s++)
  2160 	{
  2161             if (*s==CHAR_DQUOTE)
  2162 	    {
  2163                 if (!(dquotepar=!dquotepar))
  2164 		{
  2165 		    /* parity even */
  2166                     if (!strchr("_-.'`/,;:!?)]} ",s[1]))
  2167 		    {
  2168                         if (pswit[ECHO_SWITCH])
  2169 			    printf("\n%s\n",aline);
  2170                         if (!pswit[OVERVIEW_SWITCH])
  2171                             printf("    Line %ld column %d - "
  2172 			      "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
  2173                         else
  2174                             cnt_punct++;
  2175 		    }
  2176 		}
  2177                 else
  2178 		{
  2179 		    /* parity odd */
  2180                     if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
  2181 		      !strchr("_-/.'`([{$",s[1]) || !s[1])
  2182 		    {
  2183                         if (pswit[ECHO_SWITCH])
  2184 			    printf("\n%s\n",aline);
  2185                         if (!pswit[OVERVIEW_SWITCH])
  2186                             printf("    Line %ld column %d - "
  2187 			      "Wrongspaced quotes?\n",linecnt,(int)(s-aline)+1);
  2188                         else
  2189                             cnt_punct++;
  2190 		    }
  2191 		}
  2192 	    }
  2193 	}
  2194 	if (*aline==CHAR_DQUOTE)
  2195 	{
  2196 	    if (strchr(",;:!?)]} ",aline[1]))
  2197 	    {
  2198 		if (pswit[ECHO_SWITCH])
  2199 		    printf("\n%s\n",aline);
  2200 		if (!pswit[OVERVIEW_SWITCH])
  2201 		    printf("    Line %ld column 1 - Wrongspaced quotes?\n",
  2202 		      linecnt);
  2203 		else
  2204 		    cnt_punct++;
  2205 	    }
  2206 	}
  2207         if (pswit[SQUOTE_SWITCH])
  2208 	{
  2209             for (s=aline;*s;s++)
  2210 	    {
  2211                 if ((*s==CHAR_SQUOTE || *s==CHAR_OPEN_SQUOTE) &&
  2212 		  (s==aline || s>aline && !gcisalpha(s[-1]) ||
  2213 		  !gcisalpha(s[1])))
  2214 		{
  2215                     if (!(squotepar=!squotepar))
  2216 		    {
  2217 			/* parity even */
  2218                         if (!strchr("_-.'`/\",;:!?)]} ",s[1]))
  2219 			{
  2220                             if (pswit[ECHO_SWITCH])
  2221 				printf("\n%s\n",aline);
  2222                             if (!pswit[OVERVIEW_SWITCH])
  2223                                 printf("    Line %ld column %d - "
  2224 				  "Wrongspaced singlequotes?\n",
  2225 				  linecnt,(int)(s-aline)+1);
  2226                             else
  2227                                 cnt_punct++;
  2228 			}
  2229 		    }
  2230                     else
  2231 		    {
  2232 			/* parity odd */
  2233                         if (!gcisalpha(s[1]) && !isdigit(s[1]) &&
  2234 			  !strchr("_-/\".'`",s[1]) || !s[1])
  2235 			{
  2236                             if (pswit[ECHO_SWITCH])
  2237 				printf("\n%s\n",aline);
  2238                             if (!pswit[OVERVIEW_SWITCH])
  2239                                 printf("    Line %ld column %d - "
  2240 				  "Wrongspaced singlequotes?\n",
  2241 				  linecnt,(int)(s-aline)+1);
  2242                             else
  2243                                 cnt_punct++;
  2244 			}
  2245 		    }
  2246 		}
  2247 	    }
  2248 	}
  2249         /*
  2250 	 * Look for double punctuation like ,. or ,,
  2251          * Thanks to DW for the suggestion!
  2252          * In books with references, ".," and ".;" are common
  2253          * e.g. "etc., etc.," and vol. 1.; vol 3.;
  2254          * OTOH, from my initial tests, there are also fairly
  2255          * common errors. What to do? Make these cases paranoid?
  2256          * ".," is the most common, so warnings->dotcomma is used
  2257          * to suppress detailed reporting if it occurs often.
  2258 	 */
  2259         llen=strlen(aline);
  2260         for (i=0;i<llen;i++)
  2261 	{
  2262 	    /* for each punctuation character in the line */
  2263             if (strchr(".?!,;:",aline[i]) && (strchr(".?!,;:",aline[i+1])) &&
  2264 	      aline[i] && aline[i+1])
  2265 	    {
  2266 		/* followed by punctuation, it's a query, unless . . . */
  2267                 if (aline[i]==aline[i+1] && (aline[i]=='.' || aline[i]=='?' ||
  2268 		  aline[i]=='!') ||
  2269 		  !warnings->dotcomma && aline[i]=='.' && aline[i+1]==',' ||
  2270 		  warnings->isFrench && !strncmp(aline+i,",...",4) ||
  2271 		  warnings->isFrench && !strncmp(aline+i,"...,",4) ||
  2272 		  warnings->isFrench && !strncmp(aline+i,";...",4) ||
  2273 		  warnings->isFrench && !strncmp(aline+i,"...;",4) ||
  2274 		  warnings->isFrench && !strncmp(aline+i,":...",4) ||
  2275 		  warnings->isFrench && !strncmp(aline+i,"...:",4) ||
  2276 		  warnings->isFrench && !strncmp(aline+i,"!...",4) ||
  2277 		  warnings->isFrench && !strncmp(aline+i,"...!",4) ||
  2278 		  warnings->isFrench && !strncmp(aline+i,"?...",4) ||
  2279 		  warnings->isFrench && !strncmp(aline+i,"...?",4))
  2280 		{
  2281 		    if (warnings->isFrench && !strncmp(aline+i,",...",4) ||
  2282 		      warnings->isFrench && !strncmp(aline+i,"...,",4) ||
  2283 		      warnings->isFrench && !strncmp(aline+i,";...",4) ||
  2284 		      warnings->isFrench && !strncmp(aline+i,"...;",4) ||
  2285 		      warnings->isFrench && !strncmp(aline+i,":...",4) ||
  2286 		      warnings->isFrench && !strncmp(aline+i,"...:",4) ||
  2287 		      warnings->isFrench && !strncmp(aline+i,"!...",4) ||
  2288 		      warnings->isFrench && !strncmp(aline+i,"...!",4) ||
  2289 		      warnings->isFrench && !strncmp(aline+i,"?...",4) ||
  2290 		      warnings->isFrench && !strncmp(aline+i,"...?",4))
  2291 			i+=4;
  2292 		    ; /* do nothing for .. !! and ?? which can be legit */
  2293 		}
  2294                 else
  2295 		{
  2296                     if (pswit[ECHO_SWITCH])
  2297 			printf("\n%s\n",aline);
  2298                     if (!pswit[OVERVIEW_SWITCH])
  2299                         printf("    Line %ld column %d - Double punctuation?\n",
  2300 			  linecnt,i+1);
  2301                     else
  2302                         cnt_punct++;
  2303 		}
  2304 	    }
  2305 	}
  2306         s=aline;
  2307         while (strstr(s," \" "))
  2308 	{
  2309             if (pswit[ECHO_SWITCH])
  2310 		printf("\n%s\n",aline);
  2311             if (!pswit[OVERVIEW_SWITCH])
  2312                 printf("    Line %ld column %d - Spaced doublequote?\n",
  2313 		  linecnt,(int)(strstr(s," \" ")-aline+1));
  2314             else
  2315                 cnt_punct++;
  2316             s=strstr(s," \" ")+2;
  2317 	}
  2318         s=aline;
  2319         while (strstr(s," ' "))
  2320 	{
  2321             if (pswit[ECHO_SWITCH])
  2322 		printf("\n%s\n",aline);
  2323             if (!pswit[OVERVIEW_SWITCH])
  2324                 printf("    Line %ld column %d - Spaced singlequote?\n",
  2325 		  linecnt,(int)(strstr(s," ' ")-aline+1));
  2326             else
  2327                 cnt_punct++;
  2328             s=strstr(s," ' ")+2;
  2329 	}
  2330         s=aline;
  2331         while (strstr(s," ` "))
  2332 	{
  2333             if (pswit[ECHO_SWITCH])
  2334 		printf("\n%s\n",aline);
  2335             if (!pswit[OVERVIEW_SWITCH])
  2336                 printf("    Line %ld column %d - Spaced singlequote?\n",
  2337 		  linecnt,(int)(strstr(s," ` ")-aline+1));
  2338             else
  2339                 cnt_punct++;
  2340             s=strstr(s," ` ")+2;
  2341 	}
  2342         /* check special case of 'S instead of 's at end of word */
  2343         s=aline+1;
  2344         while (*s)
  2345 	{
  2346             if (*s==CHAR_SQUOTE && s[1]=='S' && s[-1]>='a' && s[-1]<='z')
  2347 	    {
  2348                 if (pswit[ECHO_SWITCH])
  2349 		    printf("\n%s\n",aline);
  2350                 if (!pswit[OVERVIEW_SWITCH])
  2351                     printf("    Line %ld column %d - Capital \"S\"?\n",
  2352 		      linecnt,(int)(s-aline+2));
  2353                 else
  2354                     cnt_punct++;
  2355 	    }
  2356             s++;
  2357 	}
  2358         /*
  2359 	 * Now check special cases - start and end of line -
  2360          * for single and double quotes. Start is sometimes [sic]
  2361          * but better to query it anyway.
  2362          * While we're here, check for dash at end of line.
  2363 	 */
  2364         llen=strlen(aline);
  2365         if (llen>1)
  2366 	{
  2367             if (aline[llen-1]==CHAR_DQUOTE || aline[llen-1]==CHAR_SQUOTE ||
  2368 	      aline[llen-1]==CHAR_OPEN_SQUOTE)
  2369                 if (aline[llen-2]==CHAR_SPACE)
  2370 		{
  2371                     if (pswit[ECHO_SWITCH])
  2372 			printf("\n%s\n",aline);
  2373                     if (!pswit[OVERVIEW_SWITCH])
  2374                         printf("    Line %ld column %d - Spaced quote?\n",
  2375 			  linecnt,llen);
  2376                     else
  2377                         cnt_punct++;
  2378 		}
  2379             if ((aline[0]==CHAR_SQUOTE || aline[0]==CHAR_OPEN_SQUOTE) &&
  2380 	      aline[1]==CHAR_SPACE)
  2381 	    {
  2382 		if (pswit[ECHO_SWITCH])
  2383 		    printf("\n%s\n",aline);
  2384 		if (!pswit[OVERVIEW_SWITCH])
  2385 		    printf("    Line %ld column 1 - Spaced quote?\n",linecnt);
  2386 		else
  2387 		    cnt_punct++;
  2388 	    }
  2389             /*
  2390 	     * Dash at end of line may well be legit - paranoid mode only
  2391              * and don't report em-dash at line-end.
  2392 	     */
  2393             if (pswit[PARANOID_SWITCH] && warnings->hyphen)
  2394 	    {
  2395                 for (i=llen-1;i>0 && (unsigned char)aline[i]<=CHAR_SPACE;i--)
  2396 		    ;
  2397                 if (aline[i]=='-' && aline[i-1]!='-')
  2398 		{
  2399                     if (pswit[ECHO_SWITCH])
  2400 			printf("\n%s\n",aline);
  2401                     if (!pswit[OVERVIEW_SWITCH])
  2402                         printf("    Line %ld column %d - "
  2403 			  "Hyphen at end of line?\n",linecnt,i);
  2404 		}
  2405 	    }
  2406 	}
  2407         /*
  2408 	 * Brackets are often unspaced, but shouldn't be surrounded by alpha.
  2409          * If so, suspect a scanno like "a]most".
  2410 	 */
  2411         llen=strlen(aline);
  2412         for (i=1;i<llen-1;i++)
  2413 	{
  2414 	    /* for each bracket character in the line except 1st & last */
  2415             if (strchr("{[()]}",aline[i]) && gcisalpha(aline[i-1]) &&
  2416 	      gcisalpha(aline[i+1]))
  2417 	    {
  2418                 if (pswit[ECHO_SWITCH])
  2419 		    printf("\n%s\n",aline);
  2420                 if (!pswit[OVERVIEW_SWITCH])
  2421                     printf("    Line %ld column %d - Unspaced bracket?\n",
  2422 		      linecnt,i);
  2423                 else
  2424                     cnt_punct++;
  2425 	    }
  2426 	}
  2427         llen=strlen(aline);
  2428         if (warnings->endquote)
  2429 	{
  2430             for (i=1;i<llen;i++)
  2431 	    {
  2432 		/* for each character in the line except 1st */
  2433                 if (aline[i]==CHAR_DQUOTE && isalpha(aline[i-1]))
  2434 		{
  2435 		    if (pswit[ECHO_SWITCH])
  2436 			printf("\n%s\n",aline);
  2437 		    if (!pswit[OVERVIEW_SWITCH])
  2438 			printf("    Line %ld column %d - "
  2439 			  "endquote missing punctuation?\n",linecnt,i);
  2440 		    else
  2441 			cnt_punct++;
  2442 		}
  2443 	    }
  2444 	}
  2445 	/*
  2446          * Check for <HTML TAG>.
  2447          * If there is a < in the line, followed at some point
  2448          * by a > then we suspect HTML.
  2449 	 */
  2450         if (strstr(aline,"<") && strstr(aline,">"))
  2451 	{
  2452             i=(signed int)(strstr(aline,">")-strstr(aline,"<")+1);
  2453             if (i>0)
  2454 	    {
  2455                 strncpy(wrk,strstr(aline,"<"),i);
  2456                 wrk[i]=0;
  2457                 if (pswit[ECHO_SWITCH])
  2458 		    printf("\n%s\n",aline);
  2459                 if (!pswit[OVERVIEW_SWITCH])
  2460                     printf("    Line %ld column %d - HTML Tag? %s \n",
  2461 		      linecnt,(int)(strstr(aline,"<")-aline)+1,wrk);
  2462                 else
  2463                     cnt_html++;
  2464 	    }
  2465 	}
  2466         /*
  2467 	 * Check for &symbol; HTML.
  2468          * If there is a & in the line, followed at
  2469          * some point by a ; then we suspect HTML.
  2470 	 */
  2471         if (strstr(aline,"&") && strstr(aline,";"))
  2472 	{
  2473             i=(int)(strstr(aline,";")-strstr(aline,"&")+1);
  2474             for (s=strstr(aline,"&");s<strstr(aline,";");s++)   
  2475                 if (*s==CHAR_SPACE)
  2476 		    i=0;                /* Don't report "Jones & Son;" */
  2477             if (i>0)
  2478 	    {
  2479                 strncpy(wrk,strstr(aline,"&"),i);
  2480                 wrk[i]=0;
  2481                 if (pswit[ECHO_SWITCH])
  2482 		    printf("\n%s\n",aline);
  2483                 if (!pswit[OVERVIEW_SWITCH])
  2484                     printf("    Line %ld column %d - HTML symbol? %s \n",
  2485 		      linecnt,(int)(strstr(aline,"&")-aline)+1,wrk);
  2486                 else
  2487                     cnt_html++;
  2488 	    }
  2489 	}
  2490         /*
  2491 	 * At end of paragraph, check for mismatched quotes.
  2492          * We don't want to report an error immediately, since it is a
  2493          * common convention to omit the quotes at end of paragraph if
  2494          * the next paragraph is a continuation of the same speaker.
  2495          * Where this is the case, the next para should begin with a
  2496          * quote, so we store the warning message and only display it
  2497          * at the top of the next iteration if the new para doesn't
  2498          * start with a quote.
  2499          * The -p switch overrides this default, and warns of unclosed
  2500          * quotes on _every_ paragraph, whether the next begins with a
  2501          * quote or not.
  2502 	 */
  2503         if (isemptyline)
  2504 	{
  2505 	    /* end of para - add up the totals */
  2506             if (counters.quot%2)
  2507                 sprintf(dquote_err,"    Line %ld - Mismatched quotes\n",
  2508 		  linecnt);
  2509             if (pswit[SQUOTE_SWITCH] && counters.open_single_quote &&
  2510 	      counters.open_single_quote!=counters.close_single_quote)
  2511                 sprintf(squote_err,"    Line %ld - Mismatched singlequotes?\n",
  2512 		  linecnt);
  2513             if (pswit[SQUOTE_SWITCH] && counters.open_single_quote &&
  2514 	      counters.open_single_quote!=counters.close_single_quote &&
  2515 	      counters.open_single_quote!=counters.close_single_quote+1)
  2516 		/*
  2517 		 * Flag it to be noted regardless of the
  2518 		 * first char of the next para.
  2519 		 */
  2520                 squot=1;
  2521             if (counters.r_brack)
  2522                 sprintf(rbrack_err,"    Line %ld - "
  2523 		  "Mismatched round brackets?\n",linecnt);
  2524             if (counters.s_brack)
  2525                 sprintf(sbrack_err,"    Line %ld - "
  2526 		  "Mismatched square brackets?\n",linecnt);
  2527             if (counters.c_brack)
  2528                 sprintf(cbrack_err,"    Line %ld - "
  2529 		  "Mismatched curly brackets?\n",linecnt);
  2530             if (counters.c_unders%2)
  2531                 sprintf(unders_err,"    Line %ld - Mismatched underscores?\n",
  2532 		  linecnt);
  2533 	    memset(&counters,0,sizeof(counters));
  2534 	    /* let the next iteration know that it's starting a new para */
  2535             isnewpara=1;
  2536 	}
  2537         /*
  2538 	 * Check for omitted punctuation at end of paragraph by working back
  2539 	 * through prevline. DW.
  2540          * Need to check this only for "normal" paras.
  2541          * So what is a "normal" para?
  2542          *    Not normal if one-liner (chapter headings, etc.)
  2543          *    Not normal if doesn't contain at least one locase letter
  2544          *    Not normal if starts with space
  2545 	 */
  2546         if (isemptyline)
  2547 	{
  2548 	    /* end of para */
  2549             for (s=prevline,i=0;*s && !i;s++)
  2550                 if (gcisletter(*s))
  2551 		    /* use i to indicate the presence of a letter on the line */
  2552                     i=1;
  2553             /*
  2554 	     * This next "if" is a problem.
  2555              * If we say "start_para_line <= linecnt - 1", that includes
  2556 	     * one-line "paragraphs" like chapter heads. Lotsa false positives.
  2557              * If we say "start_para_line < linecnt - 1" it doesn't, but then it
  2558              * misses genuine one-line paragraphs.
  2559 	     */
  2560             if (i && last.blen>2 && start_para_line<linecnt-1 &&
  2561 	      *prevline>CHAR_SPACE)
  2562 	    {
  2563                 for (i=strlen(prevline)-1;
  2564 		  (prevline[i]==CHAR_DQUOTE || prevline[i]==CHAR_SQUOTE) &&
  2565 		  prevline[i]>CHAR_SPACE && i>0;
  2566 		  i--)
  2567 		    ;
  2568                 for (;i>0;i--)
  2569 		{
  2570                     if (gcisalpha(prevline[i]))
  2571 		    {
  2572                         if (pswit[ECHO_SWITCH])
  2573 			    printf("\n%s\n",prevline);
  2574                         if (!pswit[OVERVIEW_SWITCH])
  2575                             printf("    Line %ld column %d - "
  2576 			      "No punctuation at para end?\n",
  2577 			      linecnt-1,strlen(prevline));
  2578                         else
  2579                             cnt_punct++;
  2580                         break;
  2581 		    }
  2582                     if (strchr("-.:!([{?}])",prevline[i]))
  2583                         break;
  2584 		}
  2585 	    }
  2586 	}
  2587         strcpy(prevline,aline);
  2588     }
  2589     fclose(infile);
  2590     if (!pswit[OVERVIEW_SWITCH])
  2591         for (i=0;i<MAX_QWORD;i++)
  2592             if (dupcnt[i])
  2593                 printf("\nNote: Queried word %s was duplicated %d time%s\n",
  2594 		  qword[i],dupcnt[i],"s");
  2595 }
  2596 
  2597 /*
  2598  * flgets:
  2599  *
  2600  * Get one line from the input stream, checking for
  2601  * the existence of exactly one CR/LF line-end per line.
  2602  *
  2603  * Returns: a pointer to the line.
  2604  */
  2605 char *flgets(char *theline,int maxlen,FILE *thefile,long lcnt)
  2606 {
  2607     char c;
  2608     int len,isCR,cint;
  2609     *theline=0;
  2610     len=isCR=0;
  2611     c=cint=fgetc(thefile);
  2612     do
  2613     {
  2614         if (cint==EOF)
  2615             return NULL;
  2616 	/* either way, it's end of line */
  2617         if (c==10)
  2618 	{
  2619             if (isCR)
  2620                 break;
  2621             else
  2622 	    {
  2623 		/* Error - a LF without a preceding CR */
  2624                 if (pswit[LINE_END_SWITCH])
  2625 		{
  2626                     if (pswit[ECHO_SWITCH])
  2627 			printf("\n%s\n",theline);
  2628                     if (!pswit[OVERVIEW_SWITCH])
  2629                         printf("    Line %ld - No CR?\n",lcnt);
  2630                     else
  2631                         cnt_lineend++;
  2632 		}
  2633                 break;
  2634 	    }
  2635 	}
  2636         if (c==13)
  2637 	{
  2638             if (isCR)
  2639 	    {
  2640 		/* Error - two successive CRs */
  2641                 if (pswit[LINE_END_SWITCH])
  2642 		{
  2643                     if (pswit[ECHO_SWITCH])
  2644 			printf("\n%s\n",theline);
  2645                     if (!pswit[OVERVIEW_SWITCH])
  2646                         printf("    Line %ld - Two successive CRs?\n",lcnt);
  2647                     else
  2648                         cnt_lineend++;
  2649 		}
  2650 	    }
  2651             isCR=1;
  2652 	}
  2653         else
  2654 	{
  2655             if (pswit[LINE_END_SWITCH] && isCR)
  2656 	    {
  2657                 if (pswit[ECHO_SWITCH])
  2658 		    printf("\n%s\n",theline);
  2659                 if (!pswit[OVERVIEW_SWITCH])
  2660                     printf("    Line %ld column %d - CR without LF?\n",
  2661 		      lcnt,len+1);
  2662                 else
  2663                     cnt_lineend++;
  2664 	    }
  2665             theline[len]=c;
  2666             len++;
  2667             theline[len]=0;
  2668             isCR=0;
  2669 	}
  2670         c=cint=fgetc(thefile);
  2671     } while(len<maxlen);
  2672     if (pswit[MARKUP_SWITCH])  
  2673         postprocess_for_HTML(theline);
  2674     if (pswit[DP_SWITCH])  
  2675         postprocess_for_DP(theline);
  2676     return theline;
  2677 }
  2678 
  2679 /*
  2680  * mixdigit:
  2681  *
  2682  * Takes a "word" as a parameter, and checks whether it
  2683  * contains a mixture of alpha and digits. Generally, this is an
  2684  * error, but may not be for cases like 4th or L5 12s. 3d.
  2685  *
  2686  * Returns: 0 if no error found, 1 if error.
  2687  */
  2688 int mixdigit(char *checkword)
  2689 {
  2690     int wehaveadigit,wehavealetter,firstdigits,query,wl;
  2691     char *s;
  2692     wehaveadigit=wehavealetter=query=0;
  2693     for (s=checkword;*s;s++)
  2694         if (gcisalpha(*s))
  2695             wehavealetter=1;
  2696         else
  2697             if (gcisdigit(*s))
  2698                 wehaveadigit=1;
  2699     if (wehaveadigit && wehavealetter)
  2700     {
  2701 	/* Now exclude common legit cases, like "21st" and "12l. 3s. 11d." */
  2702         query=1;
  2703         wl=strlen(checkword);
  2704         for (firstdigits=0;gcisdigit(checkword[firstdigits]);firstdigits++)
  2705             ;
  2706         /* digits, ending in st, rd, nd, th of either case */
  2707         if (firstdigits+2==wl && (matchword(checkword+wl-2,"st") ||
  2708 	  matchword(checkword+wl-2,"rd") || matchword(checkword+wl-2,"nd") ||
  2709 	  matchword(checkword+wl-2,"th")))
  2710 	    query=0;
  2711         if (firstdigits+3==wl && (matchword(checkword+wl-3,"sts") ||
  2712 	  matchword(checkword+wl-3,"rds") || matchword(checkword+wl-3,"nds") ||
  2713 	  matchword(checkword+wl-3,"ths")))
  2714 	    query=0;
  2715         if (firstdigits+3==wl && (matchword(checkword+wl-4,"stly") ||
  2716 	  matchword(checkword+wl-4,"rdly") ||
  2717 	  matchword(checkword+wl-4,"ndly") || matchword(checkword+wl-4,"thly")))
  2718 	    query=0;
  2719         /* digits, ending in l, L, s or d */
  2720         if (firstdigits+1==wl && (checkword[wl-1]=='l' ||
  2721 	  checkword[wl-1]=='L' || checkword[wl-1]=='s' || checkword[wl-1]=='d'))
  2722 	    query=0;
  2723         /*
  2724 	 * L at the start of a number, representing Britsh pounds, like L500.
  2725          * This is cute. We know the current word is mixeddigit. If the first
  2726          * letter is L, there must be at least one digit following. If both
  2727          * digits and letters follow, we have a genuine error, else we have a
  2728          * capital L followed by digits, and we accept that as a non-error.
  2729 	 */
  2730         if (checkword[0]=='L' && !mixdigit(checkword+1))
  2731 	    query=0;
  2732     }
  2733     return query;
  2734 }
  2735 
  2736 /*
  2737  * getaword:
  2738  *
  2739  * Extracts the first/next "word" from the line, and puts
  2740  * it into "thisword". A word is defined as one English word unit--or
  2741  * at least that's the aim.
  2742  *
  2743  * Returns: a pointer to the position in the line where we will start
  2744  *          looking for the next word.
  2745  */
  2746 const char *getaword(const char *fromline,char *thisword)
  2747 {
  2748     int i,wordlen;
  2749     const char *s;
  2750     wordlen=0;
  2751     for (;!gcisdigit(*fromline) && !gcisalpha(*fromline) && *fromline;
  2752       fromline++)
  2753 	;
  2754     /*
  2755      * Use a look-ahead to handle exceptions for numbers like 1,000 and 1.35.
  2756      * Especially yucky is the case of L1,000
  2757      * This section looks for a pattern of characters including a digit
  2758      * followed by a comma or period followed by one or more digits.
  2759      * If found, it returns this whole pattern as a word; otherwise we discard
  2760      * the results and resume our normal programming.
  2761      */
  2762     s=fromline;
  2763     for (;(gcisdigit(*s) || gcisalpha(*s) || *s==',' || *s=='.') &&
  2764       wordlen<MAXWORDLEN;s++)
  2765     {
  2766 	thisword[wordlen]=*s;
  2767         wordlen++;
  2768     }
  2769     thisword[wordlen]=0;
  2770     for (i=1;i<wordlen-1;i++)
  2771     {
  2772         if (thisword[i]=='.' || thisword[i]==',')
  2773 	{
  2774             if (gcisdigit(thisword[i-1]) && gcisdigit(thisword[i-1]))
  2775 	    {
  2776                 fromline=s;
  2777                 return fromline;
  2778 	    }
  2779 	}
  2780     }
  2781     /* we didn't find a punctuated number - do the regular getword thing */
  2782     wordlen=0;
  2783     for (;(gcisdigit(*fromline) || gcisalpha(*fromline) || *fromline=='\'') &&
  2784       wordlen<MAXWORDLEN;fromline++)
  2785     {
  2786         thisword[wordlen]=*fromline;
  2787         wordlen++;
  2788     }
  2789     thisword[wordlen]=0;
  2790     return fromline;
  2791 }
  2792 
  2793 /*
  2794  * matchword:
  2795  *
  2796  * A case-insensitive string matcher.
  2797  */
  2798 int matchword(char *checkfor,char *thisword)
  2799 {
  2800     unsigned int ismatch,i;
  2801     if (strlen(checkfor)!=strlen(thisword))
  2802 	return 0;
  2803     ismatch=1;     /* assume a match until we find a difference */
  2804     for (i=0;i<strlen(checkfor);i++)
  2805         if (toupper(checkfor[i])!=toupper(thisword[i]))
  2806             ismatch=0;
  2807     return ismatch;
  2808 }
  2809 
  2810 /*
  2811  * lowerit:
  2812  *
  2813  * Lowercase the line.
  2814  */
  2815 
  2816 void lowerit(char *theline)
  2817 {
  2818     for (;*theline;theline++)
  2819         if (*theline>='A' && *theline<='Z')
  2820             *theline+=32;
  2821 }
  2822 
  2823 /*
  2824  * isroman:
  2825  *
  2826  * Is this word a Roman Numeral?
  2827  *
  2828  * It doesn't actually validate that the number is a valid Roman Numeral--for
  2829  * example it will pass MXXXXXXXXXX as a valid Roman Numeral, but that's not
  2830  * what we're here to do. If it passes this, it LOOKS like a Roman numeral.
  2831  * Anyway, the actual Romans were pretty tolerant of bad arithmetic, or
  2832  * expressions thereof, except when it came to taxes. Allow any number of M,
  2833  * an optional D, an optional CM or CD, any number of optional Cs, an optional
  2834  * XL or an optional XC, an optional IX or IV, an optional V and any number
  2835  * of optional Is.
  2836  */
  2837 int isroman(char *t)
  2838 {
  2839     char *s;
  2840     if (!t || !*t)
  2841 	return 0;
  2842     s=t;
  2843     while (*t=='m' && *t)
  2844 	t++;
  2845     if (*t=='d')
  2846 	t++;
  2847     if (*t=='c' && t[1]=='m')
  2848 	t+=2;
  2849     if (*t=='c' && t[1]=='d')
  2850 	t+=2;
  2851     while (*t=='c' && *t)
  2852 	t++;
  2853     if (*t=='x' && t[1]=='l')
  2854 	t+=2;
  2855     if (*t=='x' && t[1]=='c')
  2856 	t+=2;
  2857     if (*t=='l')
  2858 	t++;
  2859     while (*t=='x' && *t)
  2860 	t++;
  2861     if (*t=='i' && t[1]=='x')
  2862 	t+=2;
  2863     if (*t=='i' && t[1]=='v')
  2864 	t+=2;
  2865     if (*t=='v')
  2866 	t++;
  2867     while (*t=='i' && *t)
  2868 	t++;
  2869     return !*t;
  2870 }
  2871 
  2872 /*
  2873  * gcisalpha:
  2874  *
  2875  * A version of isalpha() that is somewhat lenient on 8-bit texts.
  2876  * If we use the standard function, 8-bit accented characters break
  2877  * words, so that tete with accented characters appears to be two words, "t"
  2878  * and "t", with 8-bit characters between them. This causes over-reporting of
  2879  * errors. gcisalpha() recognizes accented letters from the CP1252 (Windows)
  2880  * and ISO-8859-1 character sets, which are the most common PG 8-bit types.
  2881  */
  2882 int gcisalpha(unsigned char c)
  2883 {
  2884     if (c>='a' && c<='z')
  2885 	return 1;
  2886     if (c>='A' && c<='Z')
  2887 	return 1;
  2888     if (c<140)
  2889 	return 0;
  2890     if (c>=192 && c!=208 && c!=215 && c!=222 && c!=240 && c!=247 && c!=254)
  2891 	return 1;
  2892     if (c==140 || c==142 || c==156 || c==158 || c==159)
  2893 	return 1;
  2894     return 0;
  2895 }
  2896 
  2897 /*
  2898  * gcisdigit:
  2899  *
  2900  * A version of isdigit() that doesn't get confused in 8-bit texts.
  2901  */
  2902 int gcisdigit(unsigned char c)
  2903 {   
  2904     return c>='0' && c<='9';
  2905 }
  2906 
  2907 /*
  2908  * gcisletter:
  2909  *
  2910  * A version of isletter() that doesn't get confused in 8-bit texts.
  2911  * NB: this is ISO-8891-1-specific.
  2912  */
  2913 int gcisletter(unsigned char c)
  2914 {   
  2915     return c>='A' && c<='Z' || c>='a' && c<='z' || c>=192;
  2916 }
  2917 
  2918 /*
  2919  * gcstrchr:
  2920  *
  2921  * Wraps strchr to return NULL if the character being searched for is zero.
  2922  */
  2923 char *gcstrchr(char *s,char c)
  2924 {
  2925     if (!c)
  2926 	return NULL;
  2927     return strchr(s,c);
  2928 }
  2929 
  2930 /*
  2931  * postprocess_for_DP:
  2932  *
  2933  * Invoked with the -d switch from flgets().
  2934  * It simply "removes" from the line a hard-coded set of common
  2935  * DP-specific tags, so that the line passed to the main routine has
  2936  * been pre-cleaned of DP markup.
  2937  */
  2938 void postprocess_for_DP(char *theline)
  2939 {
  2940     char *s,*t;
  2941     int i;
  2942     if (!*theline) 
  2943         return;
  2944     for (i=0;*DPmarkup[i];i++)
  2945     {
  2946         s=strstr(theline,DPmarkup[i]);
  2947         while (s)
  2948 	{
  2949             t=s+strlen(DPmarkup[i]);
  2950             while (*t)
  2951 	    {
  2952                 *s=*t;
  2953                 t++;
  2954 		s++;
  2955 	    }
  2956             *s=0;
  2957             s=strstr(theline,DPmarkup[i]);
  2958 	}
  2959     }
  2960 }
  2961 
  2962 /*
  2963  * postprocess_for_HTML:
  2964  *
  2965  * Invoked with the -m switch from flgets().
  2966  * It simply "removes" from the line a hard-coded set of common
  2967  * HTML tags and "replaces" a hard-coded set of common HTML
  2968  * entities, so that the line passed to the main routine has
  2969  * been pre-cleaned of HTML.
  2970  */
  2971 void postprocess_for_HTML(char *theline)
  2972 {
  2973     if (strstr(theline,"<") && strstr(theline,">"))
  2974         while (losemarkup(theline))
  2975             ;
  2976     while (loseentities(theline))
  2977         ;
  2978 }
  2979 
  2980 char *losemarkup(char *theline)
  2981 {
  2982     char *s,*t;
  2983     int i;
  2984     if (!*theline) 
  2985         return NULL;
  2986     s=strstr(theline,"<");
  2987     t=strstr(theline,">");
  2988     if (!s || !t)
  2989 	return NULL;
  2990     for (i=0;*markup[i];i++)
  2991         if (!tagcomp(s+1,markup[i]))
  2992 	{
  2993             if (!t[1])
  2994 	    {
  2995                 *s=0;
  2996                 return s;
  2997 	    }
  2998             else if (t>s)
  2999 	    {
  3000 		strcpy(s,t+1);
  3001 		return s;
  3002 	    }
  3003         }
  3004     /* It's an unrecognized <xxx>. */
  3005     return NULL;
  3006 }
  3007 
  3008 char *loseentities(char *theline)
  3009 {
  3010     int i;
  3011     char *s,*t;
  3012     if (!*theline) 
  3013         return NULL;
  3014     for (i=0;*entities[i].htmlent;i++)
  3015     {
  3016         s=strstr(theline,entities[i].htmlent);
  3017         if (s)
  3018 	{
  3019             t=malloc((size_t)strlen(s));
  3020             if (!t)
  3021 		return NULL;
  3022             strcpy(t,s+strlen(entities[i].htmlent));
  3023             strcpy(s,entities[i].textent);
  3024             strcat(s,t);
  3025             free(t);
  3026             return theline;
  3027 	}
  3028     }
  3029     for (i=0;*entities[i].htmlnum;i++)
  3030     {
  3031         s=strstr(theline,entities[i].htmlnum);
  3032         if (s)
  3033 	{
  3034             t=malloc((size_t)strlen(s));
  3035             if (!t)
  3036 		return NULL;
  3037             strcpy(t,s+strlen(entities[i].htmlnum));
  3038             strcpy(s,entities[i].textent);
  3039             strcat(s,t);
  3040             free(t);
  3041             return theline;
  3042 	}
  3043     }
  3044     return NULL;
  3045 }
  3046 
  3047 int tagcomp(char *strin,char *basetag)
  3048 {
  3049     char *s,*t;
  3050     s=basetag;
  3051     t=strin;
  3052     if (*t=='/')
  3053 	t++; /* ignore a slash */
  3054     while (*s && *t)
  3055     {
  3056         if (tolower(*s)!=tolower(*t))
  3057 	    return 1;
  3058         s++;
  3059 	t++;
  3060     }
  3061     return 0;
  3062 }
  3063 
  3064 void proghelp()
  3065 {
  3066     fputs("Bookloupe version " PACKAGE_VERSION ".\n",stderr);
  3067     fputs("Copyright 2000-2005 Jim Tinsley <jtinsley@pobox.com>.\n",stderr);
  3068     fputs("Copyright 2012- J. Ali Harlow <ali@juiblex.co.uk>.\n",stderr);
  3069     fputs("Bookloupe comes wih ABSOLUTELY NO WARRANTY. "
  3070       "For details, read the file COPYING.\n",stderr);
  3071     fputs("This is Free Software; "
  3072       "you may redistribute it under certain conditions (GPL);\n",stderr);
  3073     fputs("read the file COPYING for details.\n\n",stderr);
  3074     fputs("Usage is: bookloupe [-setpxloyhud] filename\n",stderr);
  3075     fputs("  where -s checks single quotes, -e suppresses echoing lines, "
  3076       "-t checks typos\n",stderr);
  3077     fputs("  -x (paranoid) switches OFF -t and extra checks, "
  3078       "-l turns OFF line-end checks\n",stderr);
  3079     fputs("  -o just displays overview without detail, "
  3080       "-h echoes header fields\n",stderr);
  3081     fputs("  -v (verbose) unsuppresses duplicate reporting, "
  3082       "-m suppresses markup\n",stderr);
  3083     fputs("  -d ignores DP-specific markup,\n",stderr);
  3084     fputs("  -u uses a file gutcheck.typ to query user-defined "
  3085       "possible typos\n",stderr);
  3086     fputs("Sample usage: bookloupe warpeace.txt \n",stderr);
  3087     fputs("\n",stderr);
  3088     fputs("Bookloupe looks for errors in Project Gutenberg(TM) etexts.\n",
  3089       stderr);
  3090     fputs("Bookloupe queries anything it thinks shouldn't be in a PG text; "
  3091       "non-ASCII\n",stderr);
  3092     fputs("characters like accented letters, "
  3093       "lines longer than 75 or shorter than 55,\n",stderr);
  3094     fputs("unbalanced quotes or brackets, "
  3095       "a variety of badly formatted punctuation, \n",stderr);
  3096     fputs("HTML tags, some likely typos. "
  3097       "It is NOT a substitute for human judgement.\n",stderr);
  3098     fputs("\n",stderr);
  3099 }